date:20160427

[PATCH net-next 3/4] Documentation: Bindings: Update DT binding for hns dsaf node

2016-04-27 Thread Yisen Zhuang

This patch changes property port-id to reg in dsaf port node,
removes property cpld-ctrl-reg, and fixes some typos.

Signed-off-by: Yisen Zhuang 
---
 .../devicetree/bindings/net/hisilicon-hns-dsaf.txt | 28 ++
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt 
b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
index 5ccd4f0..d4b7f2e 100644
--- a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
+++ b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
@@ -13,10 +13,10 @@ Required properties:
 - interrupts: should contain the DSA Fabric and rcb interrupt.
 - reg: specifies base physical address(es) and size of the device registers.
   The first region is external interface control register base and 
size(optional,
-  only be used when subctrl-syscon is not exists). It is recommended using
+  only used when subctrl-syscon does not exist). It is recommended using
   subctrl-syscon rather than this address.
-  The second region is SerDes base register and size(optional, only be used 
when
-  serdes-syscon in port node is not exists. It is recommended using
+  The second region is SerDes base register and size(optional, only used when
+  serdes-syscon in port node does not exist). It is recommended using
   serdes-syscon rather than this address.
   The third region is the PPE register base and size.
   The fourth region is dsa fabric base register and size. It is not required 
for
@@ -24,8 +24,8 @@ Required properties:
 - reg-names: may be ppe-base and(or) dsaf-base. It is used to find the
   corresponding reg's index.
 
-- phy-handle: phy handle of physicl port, 0 if not any phy device. It is 
optional
-  attribute. If port node is exists, phy-handle in each port node will be used.
+- phy-handle: phy handle of physical port, 0 if not any phy device. It is 
optional
+  attribute. If port node exists, phy-handle in each port node will be used.
   see ethernet.txt [1].
 - subctrl-syscon: is syscon handle for external interface control register.
 - reset-field-offset: is offset of reset field. Its value depends on the 
hardware
@@ -35,14 +35,12 @@ Required properties:
 
 - port: subnodes of dsaf. A dsaf node may contain several port nodes(Depending
   on mode of dsaf). Port node contain some attributes listed below:
-- port-id: is physical port index in one dsaf.
-- phy-handle: phy handle of physicl port. It is not required if there isn't
+- reg: is physical port index in one dsaf.
+- phy-handle: phy handle of physical port. It is not required if there isn't
   phy device. see ethernet.txt [1].
 - serdes-syscon: is syscon handle for SerDes register.
-- cpld-syscon: is syscon handle for cpld register. It is not required if there
-  isn't cpld device.
-- cpld-ctrl-reg: is cpld register offset. It is not required if there isn't
-  cpld-syscon.
+- cpld-syscon: is syscon handle + register offset pair for cpld register. It is
+  not required if there isn't cpld device.
 - port-rst-offset: is offset of reset field for each port in dsaf. Its value
   depends on the hardware user manual.
 - port-mode-offset: is offset of port mode field for each port in dsaf. Its
@@ -72,14 +70,14 @@ dsaf0: dsa@c700 {
desc-num = <1024>;
dma-coherent;
 
-   prot@0 {
-   port-id = 0;
+   port@0 {
+   reg = 0;
phy-handle = <&phy0>;
serdes-syscon = <&serdes>;
};
 
-   prot@1 {
-port-id = 1;
+   port@1 {
+reg = 1;
 serdes-syscon = <&serdes>;
 };
 };
-- 
1.9.1

[PATCH net-next 01/10] net: hns: add a new dsaf mode for debug port

2016-04-27 Thread Yisen Zhuang

From: huangdaode 

This patch adds a new dsaf mode named "single-port" mode for hns driver,
this mode only contains one debug port, and change the way of identify
the port type through related dsaf.

Signed-off-by: Daode Huang 
Signed-off-by: Yisen Zhuang 
---
 drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c  |   2 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c  |   8 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c |  16 +--
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h |   2 +
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c |   4 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c  |   6 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c  | 132 ++---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h  |   2 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h  |   1 -
 9 files changed, 84 insertions(+), 89 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c 
b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index 1591422..1e8bf22 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -96,7 +96,7 @@ static struct ring_pair_cb *hns_ae_get_base_ring_pair(
int q_num = rcb_comm->max_q_per_vf;
int vf_num = rcb_comm->max_vfn;
 
-   if (common_idx == HNS_DSAF_COMM_SERVICE_NW_IDX)
+   if (!HNS_DSAF_IS_DEBUG(dsaf_dev))
return &rcb_comm->ring_pair_cb[port * q_num * vf_num];
else
return &rcb_comm->ring_pair_cb[0];
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 10c367d..353b9e7 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -249,7 +249,7 @@ int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb,
struct mac_entry_idx *old_entry;
 
old_entry = &mac_cb->addr_entry_idx[vmid];
-   if (dsaf_dev) {
+   if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) {
memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr));
mac_entry.in_vlan_id = old_entry->vlan_id;
mac_entry.in_port_num = mac_cb->mac_id;
@@ -289,7 +289,7 @@ int hns_mac_set_multi(struct hns_mac_cb *mac_cb,
struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
struct dsaf_drv_mac_single_dest_entry mac_entry;
 
-   if (dsaf_dev && addr) {
+   if (!HNS_DSAF_IS_DEBUG(dsaf_dev) && addr) {
memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr));
mac_entry.in_vlan_id = 0;/*vlan_id;*/
mac_entry.in_port_num = mac_cb->mac_id;
@@ -380,7 +380,7 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb 
*mac_cb,
if (mac_cb->mac_type == HNAE_PORT_DEBUG)
return 0;
 
-   if (dsaf_dev) {
+   if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) {
memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr));
mac_entry.in_vlan_id = vlan_id;
mac_entry.in_port_num = mac_cb->mac_id;
@@ -418,7 +418,7 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 
vmid, bool enable)
 
uc_mac_entry = &mac_cb->addr_entry_idx[vmid];
 
-   if (dsaf_dev)  {
+   if (!HNS_DSAF_IS_DEBUG(dsaf_dev))  {
memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr));
mac_entry.in_vlan_id = uc_mac_entry->vlan_id;
mac_entry.in_port_num = mac_cb->mac_id;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 8439f6d..7692853 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -28,6 +28,7 @@ const char *g_dsaf_mode_match[DSAF_MODE_MAX] = {
[DSAF_MODE_DISABLE_2PORT_64VM] = "2port-64vf",
[DSAF_MODE_DISABLE_6PORT_0VM] = "6port-16rss",
[DSAF_MODE_DISABLE_6PORT_16VM] = "6port-16vf",
+   [DSAF_MODE_DISABLE_SP] = "single-port",
 };
 
 int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
@@ -217,9 +218,7 @@ static void hns_dsaf_mix_def_qid_cfg(struct dsaf_device 
*dsaf_dev)
u32 q_id, q_num_per_port;
u32 i;
 
-   hns_rcb_get_queue_mode(dsaf_dev->dsaf_mode,
-  HNS_DSAF_COMM_SERVICE_NW_IDX,
-  &max_vfn, &max_q_per_vf);
+   hns_rcb_get_queue_mode(dsaf_dev->dsaf_mode, &max_vfn, &max_q_per_vf);
q_num_per_port = max_vfn * max_q_per_vf;
 
for (i = 0, q_id = 0; i < DSAF_SERVICE_NW_NUM; i++) {
@@ -239,9 +238,7 @@ static void hns_dsaf_inner_qid_cfg(struct dsaf_device 
*dsaf_dev)
if (AE_IS_VER1(dsaf_dev->dsaf_ver))
return;
 
-   hns_rcb_get_queue_mode(dsaf_dev->dsaf_mode,
-  HNS_DSAF_COMM_SERVICE_NW_IDX,
-  &max_vfn, &max_q_per_vf);
+   hns_rcb_get_queue_mode(dsaf_dev->dsaf_mode, &max_vfn, &max_q_per_vf);
q_num_per

[PATCH net-next 07/10] net: hns: separate debug dsaf device from service dsaf device

2016-04-27 Thread Yisen Zhuang

There are two kinds of dsaf device in hns, one is for service ports,
contains crossbar in it, can work under different mode. Another is for
debug port, only can work under "single-port" mode. The old driver only
declared a dsaf device for both service ports and debug ports. This patch
separate it to three platform devices.

all port in one platform device(old):
CPU
 |
 |DSAF(one platform device)
--  /
|   |  |  |  | /
|  PPEPPEPPE |/
|   |  |  |  |   /
|   |  |  |  |  /
|crossbar  |  |  | /
|   |  |  |  |/
|---   |  |  |
||  |  |  |  |  |  |  |  |
||  |  |  |  |  |  |  |  |
|   MACMACMACMACMACMACMACMAC |
||  |  |  |  |  |  |  |  |
--
 |  |  |  |  |  |  |  |
PHYPHYPHYPHYPHYPHYPHYPHY

separate all ports to three platform(new):
 CPU
  |
---
| |   |
---   -
| |  ||   |   |   |   |
|PPE ||  PPE  |   |  PPE  |
| |  ||   |   |   |   |   |
| |  ||   |   |   |   |   |
|  crossbar  ||   |   |   |   |   |
| |  ||   |   |   |   |   |
|   --   ||   |   |   |   |   |
|   | | |  |  |  |   ||   |   |   |   |   |
|   | | |  |  |  |   ||   |   |   |   |   |
|  MAC   MAC   MACMACMACMAC  ||  MAC  |   |  MAC  |
|   | | |  |  |  |   ||   |   |   |   |   |
---   -
| | |  |  |  |\/  |/  |
   PHY   PHY   PHYPHYPHYPHY\  /  PHY  /  PHY
\/   /
 \  /   /
 DSAF(three platform device)

Signed-off-by: Daode Huang 
Signed-off-by: Yuzeng Zhuang 
---
 .../devicetree/bindings/net/hisilicon-hns-dsaf.txt |  50 +--
 drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c  |  40 ++
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c  | 153 +++--
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h  |   7 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 100 ++
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h |  12 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c |  73 ++
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c  |  47 +--
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h  |   1 -
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c  |  53 ++-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h  |  15 +-
 11 files changed, 304 insertions(+), 247 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt 
b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
index 291b3d8..01380a6 100644
--- a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
+++ b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
@@ -7,21 +7,40 @@ Required properties:
 - mode: dsa fabric mode string. only support one of dsaf modes like these:
"2port-64vf",
"6port-16rss",
-   "6port-16vf".
+   "6port-16vf",
+   "single-port".
 - interrupt-parent: the interrupt parent of this device.
 - interrupts: should contain the DSA Fabric and rcb interrupt.
 - reg: specifies base physical address(es) and size of the device registers.
-  The first region is external interface control register base and size.
-  The second region is SerDes base register and size.
+  The first region is external interface control register

[PATCH net-next 2/4] net: hns: change port-id property to reg property in dsaf port node

2016-04-27 Thread Yisen Zhuang

Indexes should generally be avoided. So we use reg rather than port-id to
index ports.

Signed-off-by: Yisen Zhuang 
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 210ba89..611581f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -834,15 +834,15 @@ int hns_mac_init(struct dsaf_device *dsaf_dev)
struct fwnode_handle *child;
 
device_for_each_child_node(dsaf_dev->dev, child) {
-   ret = fwnode_property_read_u32(child, "port-id", &port_id);
+   ret = fwnode_property_read_u32(child, "reg", &port_id);
if (ret) {
dev_err(dsaf_dev->dev,
-   "get port-id fail, ret=%d!\n", ret);
+   "get reg fail, ret=%d!\n", ret);
return ret;
}
if (port_id >= max_port_num) {
dev_err(dsaf_dev->dev,
-   "port-id(%u) out of range!\n", port_id);
+   "reg(%u) out of range!\n", port_id);
return -EINVAL;
}
mac_cb = devm_kzalloc(dsaf_dev->dev, sizeof(*mac_cb),
-- 
1.9.1

[PATCH net-next 4/4] dts: hisi: update hns dst for changing property port-id to reg

2016-04-27 Thread Yisen Zhuang

Indexes should generally be avoided. This patch changes property port-id
to reg in dsaf port node.

Signed-off-by: Yisen Zhuang 
---
 arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi 
b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi
index 7d62514..b6a130c 100644
--- a/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi
@@ -24,6 +24,8 @@ soc0: soc@0 {
};
 
dsaf0: dsa@c700 {
+   #address-cells = <1>;
+   #size-cells = <0>;
compatible = "hisilicon,hns-dsaf-v1";
mode = "6port-16rss";
interrupt-parent = <&mbigen_dsa>;
@@ -124,20 +126,20 @@ soc0: soc@0 {
dma-coherent;
 
port@0 {
-   port-id = <0>;
+   reg = <0>;
serdes-syscon = <&serdes_ctrl0>;
};
port@1 {
-   port-id = <1>;
+   reg = <1>;
serdes-syscon = <&serdes_ctrl0>;
};
port@4 {
-   port-id = <4>;
+   reg = <4>;
phy-handle = <&soc0_phy0>;
serdes-syscon = <&serdes_ctrl1>;
};
port@5 {
-   port-id = <5>;
+   reg = <5>;
phy-handle = <&soc0_phy1>;
serdes-syscon = <&serdes_ctrl1>;
};
-- 
1.9.1

[PATCH net-next 1/4] net: hns: remove cpld-ctrl-reg and add cell in the cpld-syscon property

2016-04-27 Thread Yisen Zhuang

Because cpld-ctrl-reg property is offset base on cpld-syscon property,
we make it as a cell in the cpld-syscon property.

Signed-off-by: Yisen Zhuang 
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 26 ---
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 1c8fdd3..210ba89 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -647,6 +647,7 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
 {
struct device_node *np = mac_cb->dev->of_node;
struct regmap *syscon;
+   struct of_phandle_args cpld_args;
u32 ret;
 
mac_cb->link = false;
@@ -713,22 +714,23 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
mac_cb->mac_id);
}
 
-   syscon = syscon_node_to_regmap(
-   of_parse_phandle(to_of_node(mac_cb->fw_port),
-"cpld-syscon", 0));
-   if (IS_ERR_OR_NULL(syscon)) {
-   dev_dbg(mac_cb->dev, "no cpld-syscon found!\n");
+   ret = of_parse_phandle_with_fixed_args(to_of_node(mac_cb->fw_port),
+  "cpld-syscon", 1, 0, &cpld_args);
+   if (ret) {
+   dev_dbg(mac_cb->dev, "mac%d no cpld-syscon found.\n",
+   mac_cb->mac_id);
mac_cb->cpld_ctrl = NULL;
} else {
-   mac_cb->cpld_ctrl = syscon;
-   ret = fwnode_property_read_u32(mac_cb->fw_port,
-  "cpld-ctrl-reg",
-  &mac_cb->cpld_ctrl_reg);
-   if (ret) {
-   dev_err(mac_cb->dev, "get cpld-ctrl-reg fail!\n");
-   return ret;
+   syscon = syscon_node_to_regmap(cpld_args.np);
+   if (IS_ERR_OR_NULL(syscon)) {
+   dev_dbg(mac_cb->dev, "no cpld-syscon found!\n");
+   mac_cb->cpld_ctrl = NULL;
+   } else {
+   mac_cb->cpld_ctrl = syscon;
+   mac_cb->cpld_ctrl_reg = cpld_args.args[0];
}
}
+
return 0;
 }
 
-- 
1.9.1

[PATCH net-next 05/10] net: hns: add syscon operation for dsaf

2016-04-27 Thread Yisen Zhuang

From: Daode Huang 

This patch provides the read/write function for dsaf to
access the registers through syscon methods.

Signed-off-by: Daode Huang 
Signed-off-by: Yuzeng Zhuang 
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index ed0043a..6a03c94 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -10,6 +10,7 @@
 #ifndef _DSAF_REG_H_
 #define _DSAF_REG_H_
 
+#include 
 #define HNS_DEBUG_RING_IRQ_IDX 0
 #define HNS_SERVICE_RING_IRQ_IDX 59
 #define HNSV2_SERVICE_RING_IRQ_IDX 25
@@ -989,6 +990,19 @@ static inline u32 dsaf_read_reg(u8 __iomem *base, u32 reg)
return readl(reg_addr + reg);
 }
 
+static inline void dsaf_write_syscon(struct regmap *base, u32 reg, u32 value)
+{
+   regmap_write(base, reg, value);
+}
+
+static inline u32 dsaf_read_syscon(struct regmap *base, u32 reg)
+{
+   unsigned int val;
+
+   regmap_read(base, reg, &val);
+   return val;
+}
+
 #define dsaf_read_dev(a, reg) \
dsaf_read_reg((a)->io_base, (reg))
 
-- 
1.9.1

[PATCH net-next 0/4] net: hns: update DT properties according to Rob's comments

2016-04-27 Thread Yisen Zhuang

There are some inappropriate properties definition in hns DT. We update the 
definition
according to Rob's review comments and fix some typos in binding.

For more details, please see individual patches.

MBR.

Yisen Zhuang (4):
  net: hns: remove cpld-ctrl-reg and add cell in the cpld-syscon
property
  net: hns: change port-id property to reg property in dsaf port node
  Documentation: Bindings: Update DT binding for hns dsaf node
  dts: hisi: update hns dst for changing property port-id to reg

 .../devicetree/bindings/net/hisilicon-hns-dsaf.txt | 28 +--
 arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi   | 10 ---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c  | 32 --
 3 files changed, 36 insertions(+), 34 deletions(-)

-- 
1.9.1

[PATCH net-next 04/10] net: hns: add attribute reset-field-offset for dsaf node

2016-04-27 Thread Yisen Zhuang

Add the subctrl reset offset for dsaf, this property is used to reset
xge/ge ports for different dsaf. If this attribute is not present,
default value 0 will be use.

Signed-off-by: Daode Huang 
Signed-off-by: Yuzeng Zhuang 
---
 .../devicetree/bindings/net/hisilicon-hns-dsaf.txt |  2 ++
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c |  8 +
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h |  1 +
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 40 +++---
 4 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt 
b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
index ecacfa4..291b3d8 100644
--- a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
+++ b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
@@ -17,6 +17,8 @@ Required properties:
   The fourth region is dsa fabric base register and size.
   The fifth region is cpld base register and size, it is not required if do 
not use cpld.
 - phy-handle: phy handle of physicl port, 0 if not any phy device. see 
ethernet.txt [1].
+- reset-field-offset: is offset of reset field. Its value depends on the 
hardware
+  user manual.
 - buf-size: rx buffer size, should be 16-1024.
 - desc-num: number of description in TX and RX queue, should be 512, 1024, 
2048 or 4096.
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 7692853..b418d42 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -36,6 +36,7 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
int ret, i;
u32 desc_num;
u32 buf_size;
+   u32 reset_offset = 0;
const char *mode_str;
struct device_node *np = dsaf_dev->dev->of_node;
 
@@ -119,6 +120,13 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev)
}
dsaf_dev->desc_num = desc_num;
 
+   ret = of_property_read_u32(np, "reset-field-offset", &reset_offset);
+   if (ret < 0) {
+   dev_dbg(dsaf_dev->dev,
+   "get reset-field-offset fail, ret=%d!\r\n", ret);
+   }
+   dsaf_dev->reset_offset = reset_offset;
+
ret = of_property_read_u32(np, "buf-size", &buf_size);
if (ret < 0) {
dev_err(dsaf_dev->dev,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
index a783019..47e768b 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
@@ -281,6 +281,7 @@ struct dsaf_device {
 
u32 desc_num; /*  desc num per queue*/
u32 buf_size; /*  ring buffer size */
+   u32 reset_offset; /* reset field offset in sub sysctrl */
int buf_size_type; /* ring buffer size-type */
enum dsaf_mode dsaf_mode;/* dsaf mode  */
enum hal_dsaf_mode dsaf_en;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 8cb13d9..91e0382 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -110,7 +110,11 @@ void hns_dsaf_xge_srst_by_port(struct dsaf_device 
*dsaf_dev, u32 port, u32 val)
return;
 
reg_val |= RESET_REQ_OR_DREQ;
-   reg_val |= 0x2082082 << port;
+
+   if (!HNS_DSAF_IS_DEBUG(dsaf_dev))
+   reg_val |= 0x2082082 << port;
+   else
+   reg_val |= 0x2082082 << (dsaf_dev->reset_offset + 6);
 
if (val == 0)
reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG;
@@ -129,7 +133,11 @@ void hns_dsaf_xge_core_srst_by_port(struct dsaf_device 
*dsaf_dev,
if (port >= DSAF_XGE_NUM)
return;
 
-   reg_val |= XGMAC_TRX_CORE_SRST_M << port;
+   if (!HNS_DSAF_IS_DEBUG(dsaf_dev))
+   reg_val |= XGMAC_TRX_CORE_SRST_M << port;
+   else
+   reg_val |= XGMAC_TRX_CORE_SRST_M <<
+   (dsaf_dev->reset_offset + 6);
 
if (val == 0)
reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG;
@@ -173,8 +181,8 @@ void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, 
u32 port, u32 val)
   reg_val_1);
}
} else {
-   reg_val_1 = 0x15540 << (port - 6);
-   reg_val_2 = 0x100 << (port - 6);
+   reg_val_1 = 0x15540 << dsaf_dev->reset_offset;
+   reg_val_2 = 0x100 << dsaf_dev->reset_offset;
 
if (val == 0) {
dsaf_write_reg(dsaf_dev->sc_base,
@@ -201,7 +209,11 @@ void hns_ppe_srst_by_port(struct dsaf_device *dsaf_dev, 
u32 port, u32 val)
u32 reg_val = 0;
u32 reg_addr;
 
-   reg_val |= RESET_REQ_OR_DREQ << port;
+   if (!HNS_DSAF_IS_DEBUG(dsaf_dev))
+   reg_val |= R

Re: [PATCH net 3/3] gre: receive also TEB packets for lwtunnels

2016-04-27 Thread Simon Horman

On Fri, Apr 22, 2016 at 07:44:08PM +0200, Jiri Benc wrote:
> For ipgre interfaces in collect metadata mode, receive also traffic with
> encapsulated Ethernet headers. The lwtunnel users are supposed to sort this
> out correctly. This allows to have mixed Ethernet + L3-only traffic on the
> same lwtunnel interface.
> 
> To keep backwards compatibility and prevent any surprises, gretap interfaces
> have priority in receiving packets with Ethernet headers.

Hi Jiri,

I have had some success wiring up Open vSwitch to use this patch for
transmit. However, I am wondering if something more is needed to allow
differentiation between packets with and without an L2 header present
on receive.

I had luck getting receive working with the following:

From: Simon Horman 
Date: Mon, 18 Apr 2016 17:48:47 +1000
Subject: [PATCH] gre: mark presense of l2 when recieving TEB packets for 
lwtunnels

There seems to be some way for receivers to differentiate between
packets recieved with and without an l2 header. The approach taken here
is to use a new mode bit in struct ip_tunnel_key.

Another approach might be to store tpi->proto in tunnel metadata,
though that would consume 16 bits somewhere and seems like overkill
at this point.

Signed-off-by: Simon Horman 
---
 include/net/ip_tunnels.h | 6 ++
 net/ipv4/ip_gre.c| 3 +++
 2 files changed, 9 insertions(+)

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index d916b4315903..cdf71ced429e 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -58,6 +58,12 @@ struct ip_tunnel_key {
 /* Flags for ip_tunnel_info mode. */
 #define IP_TUNNEL_INFO_TX  0x01/* represents tx tunnel parameters */
 #define IP_TUNNEL_INFO_IPV60x02/* key contains IPv6 addresses */
+#define IP_TUNNEL_INFO_L2_PRESENT  0x04/* Set on receive by tunnels 
that
+* may receive packets both with
+* and without an L2 header present
+* when a packet is received with
+* L2 header present.
+*/
 
 /* Maximum tunnel options length. */
 #define IP_TUNNEL_OPTS_MAX \
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0a4af2896a15..1290695fbc95 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -401,6 +401,9 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct 
tnl_ptk_info *tpi,
tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
if (!tun_dst)
return PACKET_REJECT;
+   if (tpi->proto == htons(ETH_P_TEB))
+   tun_dst->u.tun_info.mode |=
+   IP_TUNNEL_INFO_L2_PRESENT;
}
 
ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
-- 
2.7.0.rc3.207.g0ac5344

Re: [PATCH net-next 00/10] net: hns: add support of debug dsaf device

2016-04-27 Thread Yisen Zhuang

I am sorry for my misoperation, please ignore this series, thanks.

在 2016/4/28 15:01, Yisen Zhuang 写道:
> There are two kinds of dsaf device in hns, one is for service ports,
> contains crossbar in it, can work under different mode. Another is for
> debug port, only can work under single port mode. The current code only
> declared a dsaf device for both service ports and debug ports.It is not so
> readability. This patch separate it to three platform devices to make the
> code simple and more readability.
> 
> all port in one platform device(old):
> CPU
>  |
>  |DSAF(one platform device)
> --  /
> |   |  |  |  | /
> |  PPEPPEPPE |/
> |   |  |  |  |   /
> |   |  |  |  |  /
> |crossbar  |  |  | /
> |   |  |  |  |/
> |---   |  |  |
> ||  |  |  |  |  |  |  |  |
> ||  |  |  |  |  |  |  |  |
> |   MACMACMACMACMACMACMACMAC |
> ||  |  |  |  |  |  |  |  |
> --
>  |  |  |  |  |  |  |  |
> PHYPHYPHYPHYPHYPHYPHYPHY
> 
> separate all ports to three platform(new):
>  CPU
>   |
> ---
> | |   |
> ---   -
> | |  ||   |   |   |   |
> |PPE ||  PPE  |   |  PPE  |
> | |  ||   |   |   |   |   |
> | |  ||   |   |   |   |   |
> |  crossbar  ||   |   |   |   |   |
> | |  ||   |   |   |   |   |
> |   --   ||   |   |   |   |   |
> |   | | |  |  |  |   ||   |   |   |   |   |
> |   | | |  |  |  |   ||   |   |   |   |   |
> |  MAC   MAC   MACMACMACMAC  ||  MAC  |   |  MAC  |
> |   | | |  |  |  |   ||   |   |   |   |   |
> ---   -
> | | |  |  |  |\/  |/  |
>PHY   PHY   PHYPHYPHYPHY\  /  PHY  /  PHY
> \/   /
>  \  /   /
>  DSAF(three platform device)
> 
> I have tested this patchset on d02 board, it is compatible with the dts in
> d02 board.
> 
> For more details, please see individual patches.
> 
> MBR.
> 
> Daode Huang (3):
>   net: hns: set debug port irq index to 0
>   net: hns: add syscon operation for dsaf
>   net: hns: sort the header file by alphabetical order
> 
> Yisen Zhuang (6):
>   net: hns: add attribute port-idx-in-ae in enet node.
>   net: hns: add attribute reset-field-offset for dsaf node
>   net: hns: separate debug dsaf device from service dsaf device
>   net: hns: add attribute cpld_ctrl for dsaf port node
>   net: hns: add attribute port-rst-offset for dsaf port node
>   net: hns: add attribute port-mode-offset for dsaf port node
> 
> huangdaode (1):
>   net: hns: add a new dsaf mode for debug port
> 
>  .../devicetree/bindings/net/hisilicon-hns-dsaf.txt |  60 +-
>  .../devicetree/bindings/net/hisilicon-hns-nic.txt  |  30 ++-
>  drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c  |  73 ++-
>  drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c  | 231 
> ++---
>  drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h  |  12 +-
>  drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 141 -
>  drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h |  16 +-
>  drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 173 +--
>  drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c  |  51 +
>  drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h  |   1 -
>  drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c

Re: [PATCH v2 0/2] pegasus: correct buffer sizes

2016-04-27 Thread Johannes Berg

On Wed, 2016-04-27 at 16:26 +, David Laight wrote:
> From: Johannes Berg
> > 
> > Sent: 27 April 2016 10:44
> > On Wed, 2016-04-27 at 12:33 +0300, Petko Manolov wrote:
> > > 
> > > 
> > > Your guess turned out to be not so wild.;)All Pegasus devices are
> > > configured (by the driver) to append CRC at the end of each RX
> > > packet.However, the driver reports packet length that does not
> > > include it.
> > Interesting, then my guess was wrong though, since the length is
> > reported without it, or am I misunderstanding this?
> ...
> 
> It is even possible that the crc is written into the rx buffer even
> though the length the hardware reports excludes it.
> 

Right. I think a proper test would be to construct some kind of
ethernet-only frames, and check that they come out properly on the
other side. IP always has its own length field, so may be happy with
trailing garbage reported to the network stack, but doing raw ethernet
will let you check that the frame length is correct.

johannes

[PATCH net-next 03/10] net: hns: add attribute port-idx-in-ae in enet node.

2016-04-27 Thread Yisen Zhuang

This patch parse port-idx-in-ae in enet node.
port-idx-in-ae: is the index of port provided by AE.
In NIC mode of DSAF, all 6 PHYs of service DSAF are taken as ethernet
ports to the CPU. The port-idx-in-ae can be 0 to 5. Here is the diagram:
+-+---+
|CPU  |
+-+-+-+---+-+-+-+-+-+-+
  ||   | | | | | |
   debug debug   service
   port  port port
   (0)   (0) (0-5)

In Switch mode of DSAF, all 6 PHYs of service DSAF are taken as physical
ports connect to a LAN Switch while the CPU side assume itself have one
single NIC connect to this switch. In this case, the port-idx-in-ae will
be 0 only.
+-+-+--+--+
|CPU  |
+-+-+-+-+-+-+-+-+-+-+-+-+-+
  || service| port(0)
debug debug  ++
port  port   |   switch   |
(0)   (0)+-+-+-+-+-+-++
  | | | | | |
 external port

when port-idx-in-ae is not exists, old attribute port-id will be used
(only for compatible purpose, not recommended to use port-id in new code).

Signed-off-by: Daode Huang 
Signed-off-by: Yuzeng Zhuang 
---
 .../devicetree/bindings/net/hisilicon-hns-nic.txt  | 30 +++-
 drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c  | 33 --
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h  |  1 -
 drivers/net/ethernet/hisilicon/hns/hns_enet.c  | 17 ---
 drivers/net/ethernet/hisilicon/hns/hns_enet.h  |  3 ++
 5 files changed, 51 insertions(+), 33 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt 
b/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt
index e6a9d1c..965f6be 100644
--- a/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt
+++ b/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt
@@ -36,6 +36,34 @@ Required properties:
| | | | | |
   external port
 
+  This attribute is remained for compatible purpose. It is not recommended to
+  use it in new code.
+
+- port-idx-in-ae: is the index of port provided by AE.
+  In NIC mode of DSAF, all 6 PHYs of service DSAF are taken as ethernet ports
+  to the CPU. The port-idx-in-ae can be 0 to 5. Here is the diagram:
++-+---+
+|CPU  |
++-+-+-+---+-+-+-+-+-+-+
+  ||   | | | | | |
+   debug debug   service
+   port  port port
+   (0)   (0) (0-5)
+
+  In Switch mode of DSAF, all 6 PHYs of service DSAF are taken as physical
+  ports connect to a LAN Switch while the CPU side assume itself have one
+  single NIC connect to this switch. In this case, the port-idx-in-ae will
+  be 0 only.
++-+-+--+--+
+|CPU  |
++-+-+-+-+-+-+-+-+-+-+-+-+-+
+  || service| port(0)
+debug debug  ++
+port  port   |   switch   |
+(0)   (0)+-+-+-+-+-+-++
+  | | | | | |
+ external port
+
 - local-mac-address: mac addr of the ethernet interface
 
 Example:
@@ -43,6 +71,6 @@ Example:
ethernet@0{
compatible = "hisilicon,hns-nic-v1";
ae-handle = <&dsaf0>;
-   port-id = <0>;
+   port-idx-in-ae = <0>;
local-mac-address = [a2 14 e4 4b 56 76];
};
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c 
b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index 1e8bf22..1c86336 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -29,25 +29,6 @@ static struct hns_mac_cb *hns_get_mac_cb(struct hnae_handle 
*handle)
return vf_cb->mac_cb;
 }
 
-/**
- * hns_ae_map_eport_to_dport - translate enet port id to dsaf port id
- * @port_id: enet port id
- *: debug port 0-1, service port 2 -7 (dsaf mode only 2)
- * return: dsaf port id
- *: service ports 0 - 5, debug port 6-7
- **/
-static int hns_ae_map_eport_to_dport(u32 port_id)
-{
-   int port_index;
-
-   if (port_id < DSAF_DEBUG_NW_NUM)
-   port_index = port_id + DSAF_SERVICE_PORT_NUM_PER_DSAF;
-   else
-   port_index = port_id - DSAF_DEBUG_NW_NUM;
-
-   return port_index;
-}
-
 static struct dsaf_device *hns_ae_get_dsaf_dev(struct hnae_ae_dev *dev)
 {
return container_of(dev, struct dsaf_device, ae_dev);
@@ -110,7 +91,6 @@ static struct ring_pair_cb *hns_ae_get_ring_pair(struct 
hnae_queue *q)
 struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev,
  u32 port_id)
 {
-   int port_idx;
int vfnum_per_port;
int qnum_per_vf;
int i;
@@ -120,11 +100,10 @@ struct hnae_handle *hns_ae_

[PATCH net-next 10/10] net: hns: add attribute port-mode-offset for dsaf port node

2016-04-27 Thread Yisen Zhuang

Port mode offset for each dsaf port is different. The current code is not
so readability. This patch adds configuration named port-mode-offset to
make the code simple and more readability. If port-mode-offset isn't
exists, default value 0 will be used.

Signed-off-by: Daode Huang 
Signed-off-by: Yisen Zhuang 
---
 .../devicetree/bindings/net/hisilicon-hns-dsaf.txt |  2 +
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c  | 10 +
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h  |  1 +
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 44 ++
 4 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt 
b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
index 72efe71..31d9d19 100644
--- a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
+++ b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
@@ -45,6 +45,8 @@ Required properties:
   cpld-syscon. 
 - port-rst-offset: is offset of reset field for each port in dsaf. Its value 
   depends on the hardware user manual. 
+- port-mode-offset: is offset of port mode field for each port in dsaf. Its 
+  value depends on the hardware user manual.
 
 
 [1] Documentation/devicetree/bindings/net/phy.txt
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 190477e..d825277 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -665,6 +665,7 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
mac_cb->max_frm = MAC_DEFAULT_MTU;
mac_cb->tx_pause_frm_time = MAC_DEFAULT_PAUSE_TIME;
mac_cb->port_rst_off = mac_cb->mac_id;
+   mac_cb->port_mode_off = 0;
 
/* if the dsaf node doesn't contain a port subnode,
 * parse the old dts to get phy-handle from dsaf node
@@ -704,6 +705,15 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
mac_cb->mac_id);
}
 
+   ret = fwnode_property_read_u32(mac_cb->fw_port,
+  "port-mode-offset",
+  &mac_cb->port_mode_off);
+   if (ret) {
+   dev_dbg(mac_cb->dev,
+   "mac%d port-mode-offset not found, use default 
value.\n",
+   mac_cb->mac_id);
+   }
+
syscon = syscon_node_to_regmap(
of_parse_phandle(to_of_node(mac_cb->fw_port),
 "cpld-syscon", 0));
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index 7be7104..97ce9a7 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -319,6 +319,7 @@ struct hns_mac_cb {
struct regmap *cpld_ctrl;
u32 cpld_ctrl_reg;
u32 port_rst_off;
+   u32 port_mode_off;
struct mac_entry_idx addr_entry_idx[DSAF_MAX_VM_NUM];
u8 sfp_prsnt;
u8 cpld_led_value;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index fc3056e..eecdb14 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -265,37 +265,31 @@ phy_interface_t hns_mac_get_phy_if(struct hns_mac_cb 
*mac_cb)
 {
u32 mode;
u32 reg;
-   u32 shift;
-   u32 phy_offset;
bool is_ver1 = AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver);
int mac_id = mac_cb->mac_id;
-   phy_interface_t phy_if = PHY_INTERFACE_MODE_NA;
+   phy_interface_t phy_if;
 
-   if (is_ver1 && HNS_DSAF_IS_DEBUG(mac_cb->dsaf_dev)) {
-   phy_if = PHY_INTERFACE_MODE_SGMII;
-   } else if (mac_id >= 0 && mac_id <= 3 &&
-  !HNS_DSAF_IS_DEBUG(mac_cb->dsaf_dev)) {
-   reg = is_ver1 ? HNS_MAC_HILINK4_REG : HNS_MAC_HILINK4V2_REG;
-   mode = dsaf_read_sub(mac_cb->dsaf_dev, reg);
-   /* mac_id 0, 1, 2, 3 ---> hilink4 lane 0, 1, 2, 3 */
-   shift = is_ver1 ? 0 : mac_id;
-   if (dsaf_get_bit(mode, shift))
-   phy_if = PHY_INTERFACE_MODE_XGMII;
+   if (is_ver1) {
+   if (HNS_DSAF_IS_DEBUG(mac_cb->dsaf_dev))
+   return PHY_INTERFACE_MODE_SGMII;
+
+   if (mac_id >= 0 && mac_id <= 3)
+   reg = HNS_MAC_HILINK4_REG;
else
-   phy_if = PHY_INTERFACE_MODE_SGMII;
-   } else {
-   reg = is_ver1 ? HNS_MAC_HILINK3_REG : HNS_MAC_HILINK3V2_REG;
-   mode = dsaf_read_sub(mac_cb->dsaf_dev, reg);
-   /* mac_id 4, 5,---> hilink3 lane 2, 3
-* debug port 0(6), 1(7) ---> hilink3 lane 0, 1
-*/
-   phy_offset = mac_cb->dsaf_dev->reset_offset - 1;
-

[PATCH net-next 08/10] net: hns: add attribute cpld_ctrl for dsaf port node

2016-04-27 Thread Yisen Zhuang

This patch add attribute cpld_ctrl for dsaf port node, parse the syscon
for mac_cb from dts, and change the method of access the cpld related
registers through syscon.

Signed-off-by: Daode Huang 
Signed-off-by: Yisen Zhuang 
---
 .../devicetree/bindings/net/hisilicon-hns-dsaf.txt |  4 +++
 drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c  |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c  | 38 --
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h  |  3 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c |  5 ---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h |  1 -
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 36 +++-
 7 files changed, 55 insertions(+), 34 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt 
b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
index 01380a6..c1358ea 100644
--- a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
+++ b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
@@ -39,6 +39,10 @@ Required properties:
 - phy-handle: phy handle of physicl port. It is not required if there isn't 
   phy device. see ethernet.txt [1].
 - serdes-syscon: is syscon handle for SerDes register.
+- cpld-syscon: is syscon handle for cpld register. It is not required if there
+  isn't cpld device.
+- cpld-ctrl-reg: is cpld register offset. It is not required if there isn't
+  cpld-syscon.  
 
 
 [1] Documentation/devicetree/bindings/net/phy.txt
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c 
b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index 58341da..7a757e8 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -664,7 +664,7 @@ void hns_ae_update_led_status(struct hnae_handle *handle)
 
assert(handle);
mac_cb = hns_get_mac_cb(handle);
-   if (!mac_cb->cpld_vaddr)
+   if (!mac_cb->cpld_ctrl)
return;
hns_set_led_opt(mac_cb);
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 39dd55f..f1a477d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -82,17 +82,6 @@ static enum mac_mode hns_get_enet_interface(const struct 
hns_mac_cb *mac_cb)
}
 }
 
-int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
-{
-   if (!mac_cb->cpld_vaddr)
-   return -ENODEV;
-
-   *sfp_prsnt = !dsaf_read_b((u8 *)mac_cb->cpld_vaddr
-   + MAC_SFP_PORT_OFFSET);
-
-   return 0;
-}
-
 void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status)
 {
struct mac_driver *mac_ctrl_drv;
@@ -658,6 +647,8 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
 {
struct device_node *np = mac_cb->dev->of_node;
struct regmap *syscon;
+   u32 ret;
+
mac_cb->link = false;
mac_cb->half_duplex = false;
mac_cb->speed = mac_phy_to_speed[mac_cb->phy_if];
@@ -702,6 +693,23 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
return -EINVAL;
}
mac_cb->serdes_ctrl = syscon;
+
+   syscon = syscon_node_to_regmap(
+   of_parse_phandle(to_of_node(mac_cb->fw_port),
+"cpld-syscon", 0));
+   if (IS_ERR_OR_NULL(syscon)) {
+   dev_dbg(mac_cb->dev, "no cpld-syscon found!\n");
+   mac_cb->cpld_ctrl = NULL;
+   } else {
+   mac_cb->cpld_ctrl = syscon;
+   ret = fwnode_property_read_u32(mac_cb->fw_port,
+  "cpld-ctrl-reg",
+  &mac_cb->cpld_ctrl_reg);
+   if (ret) {
+   dev_err(mac_cb->dev, "get cpld-ctrl-reg fail!\n");
+   return ret;
+   }
+   }
return 0;
 }
 
@@ -752,11 +760,6 @@ int hns_mac_get_cfg(struct dsaf_device *dsaf_dev, struct 
hns_mac_cb *mac_cb)
mac_cb->sys_ctl_vaddr = dsaf_dev->sc_base;
mac_cb->serdes_vaddr = dsaf_dev->sds_base;
 
-   if (dsaf_dev->cpld_base && !HNS_DSAF_IS_DEBUG(dsaf_dev)) {
-   mac_cb->cpld_vaddr = dsaf_dev->cpld_base +
-   mac_cb->mac_id * CPLD_ADDR_PORT_OFFSET;
-   cpld_led_reset(mac_cb);
-   }
mac_cb->sfp_prsnt = 0;
mac_cb->txpkt_for_led = 0;
mac_cb->rxpkt_for_led = 0;
@@ -781,6 +784,7 @@ int hns_mac_get_cfg(struct dsaf_device *dsaf_dev, struct 
hns_mac_cb *mac_cb)
if (ret)
return ret;
 
+   cpld_led_reset(mac_cb);
mac_cb->vaddr = hns_mac_get_vaddr(dsaf_dev, mac_cb, mac_mode_idx);
 
return 0;
@@ -957,7 +961,7 @@ void hns_set_led_opt(struct hns_mac_cb *mac_cb)
 int hns_cpld_led_set_id(struct hns_mac_cb *mac_cb,

[PATCH net-next 09/10] net: hns: add attribute port-rst-offset for dsaf port node

2016-04-27 Thread Yisen Zhuang

The reset offset for each port in a dsaf is different. The current code is
not so readability. This patch adds configuration named port-rst-offset to
make the code simple and more readability. If this attribute isn't exists,
default value of this attribute is equal to its port index.

Signed-off-by: Yisen Zhuang 
---
 .../devicetree/bindings/net/hisilicon-hns-dsaf.txt |  4 +++-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c  | 10 +
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h  |  1 +
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 26 +++---
 4 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt 
b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
index c1358ea..72efe71 100644
--- a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
+++ b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
@@ -42,7 +42,9 @@ Required properties:
 - cpld-syscon: is syscon handle for cpld register. It is not required if there
   isn't cpld device.
 - cpld-ctrl-reg: is cpld register offset. It is not required if there isn't
-  cpld-syscon.  
+  cpld-syscon. 
+- port-rst-offset: is offset of reset field for each port in dsaf. Its value 
+  depends on the hardware user manual. 
 
 
 [1] Documentation/devicetree/bindings/net/phy.txt
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index f1a477d..190477e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -664,6 +664,7 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
 
mac_cb->max_frm = MAC_DEFAULT_MTU;
mac_cb->tx_pause_frm_time = MAC_DEFAULT_PAUSE_TIME;
+   mac_cb->port_rst_off = mac_cb->mac_id;
 
/* if the dsaf node doesn't contain a port subnode,
 * parse the old dts to get phy-handle from dsaf node
@@ -694,6 +695,15 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
}
mac_cb->serdes_ctrl = syscon;
 
+   ret = fwnode_property_read_u32(mac_cb->fw_port,
+  "port-rst-offset",
+  &mac_cb->port_rst_off);
+   if (ret) {
+   dev_dbg(mac_cb->dev, 
+   "mac%d port-rst-offset not found, use default value.\n",
+   mac_cb->mac_id);
+   }
+
syscon = syscon_node_to_regmap(
of_parse_phandle(to_of_node(mac_cb->fw_port),
 "cpld-syscon", 0));
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index 719816b..7be7104 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -318,6 +318,7 @@ struct hns_mac_cb {
struct regmap *serdes_ctrl;
struct regmap *cpld_ctrl;
u32 cpld_ctrl_reg;
+   u32 port_rst_off;
struct mac_entry_idx addr_entry_idx[DSAF_MAX_VM_NUM];
u8 sfp_prsnt;
u8 cpld_led_value;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 7e52ab5..fc3056e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -136,11 +136,7 @@ void hns_dsaf_xge_srst_by_port(struct dsaf_device 
*dsaf_dev, u32 port, u32 val)
return;
 
reg_val |= RESET_REQ_OR_DREQ;
-
-   if (!HNS_DSAF_IS_DEBUG(dsaf_dev))
-   reg_val |= 0x2082082 << port;
-   else
-   reg_val |= 0x2082082 << (dsaf_dev->reset_offset + 6);
+   reg_val |= 0x2082082 << dsaf_dev->mac_cb[port]->port_rst_off;
 
if (val == 0)
reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG;
@@ -159,11 +155,7 @@ void hns_dsaf_xge_core_srst_by_port(struct dsaf_device 
*dsaf_dev,
if (port >= DSAF_XGE_NUM)
return;
 
-   if (!HNS_DSAF_IS_DEBUG(dsaf_dev))
-   reg_val |= XGMAC_TRX_CORE_SRST_M << port;
-   else
-   reg_val |= XGMAC_TRX_CORE_SRST_M <<
-   (dsaf_dev->reset_offset + 6);
+   reg_val |= XGMAC_TRX_CORE_SRST_M << 
dsaf_dev->mac_cb[port]->port_rst_off;
 
if (val == 0)
reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG;
@@ -177,17 +169,19 @@ void hns_dsaf_ge_srst_by_port(struct dsaf_device 
*dsaf_dev, u32 port, u32 val)
 {
u32 reg_val_1;
u32 reg_val_2;
+   u32 port_rst_off;
 
if (port >= DSAF_GE_NUM)
return;
 
if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) {
reg_val_1  = 0x1 << port;
+   port_rst_off = dsaf_dev->mac_cb[port]->port_rst_off;
/* there is difference between V1 and V2 in register.*/
if (AE_IS_VER1(dsaf_dev->dsaf_ver))
-

[PATCH net-next 06/10] net: hns: sort the header file by alphabetical order

2016-04-27 Thread Yisen Zhuang

From: Daode Huang 

This patch tunes the header file by the alphabetical order.

Signed-off-by: Daode Huang 
Signed-off-by: Yuzeng Zhuang 
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c  | 12 ++--
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 12 ++--
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 353b9e7..3730385 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -7,18 +7,18 @@
  * (at your option) any later version.
  */
 
-#include 
-#include 
 #include 
-#include 
-#include 
 #include 
-#include 
+#include 
+#include 
+#include 
 #include 
 #include 
+#include 
+#include 
 
-#include "hns_dsaf_misc.h"
 #include "hns_dsaf_main.h"
+#include "hns_dsaf_misc.h"
 #include "hns_dsaf_rcb.h"
 
 #define MAC_EN_FLAG_V  0xada0328
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index b418d42..98e0e83 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -7,22 +7,22 @@
  * (at your option) any later version.
  */
 
-#include 
-#include 
+#include 
 #include 
 #include 
+#include 
+#include 
 #include 
-#include 
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 
+#include "hns_dsaf_mac.h"
 #include "hns_dsaf_main.h"
-#include "hns_dsaf_rcb.h"
 #include "hns_dsaf_ppe.h"
-#include "hns_dsaf_mac.h"
+#include "hns_dsaf_rcb.h"
 
 const char *g_dsaf_mode_match[DSAF_MODE_MAX] = {
[DSAF_MODE_DISABLE_2PORT_64VM] = "2port-64vf",
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 91e0382..67c8b9e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -7,10 +7,10 @@
  * (at your option) any later version.
  */
 
-#include "hns_dsaf_misc.h"
 #include "hns_dsaf_mac.h"
-#include "hns_dsaf_reg.h"
+#include "hns_dsaf_misc.h"
 #include "hns_dsaf_ppe.h"
+#include "hns_dsaf_reg.h"
 
 void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status,
  u16 speed, int data)
-- 
1.9.1

[PATCH net-next 00/10] net: hns: add support of debug dsaf device

2016-04-27 Thread Yisen Zhuang

There are two kinds of dsaf device in hns, one is for service ports,
contains crossbar in it, can work under different mode. Another is for
debug port, only can work under single port mode. The current code only
declared a dsaf device for both service ports and debug ports.It is not so
readability. This patch separate it to three platform devices to make the
code simple and more readability.

all port in one platform device(old):
CPU
 |
 |DSAF(one platform device)
--  /
|   |  |  |  | /
|  PPEPPEPPE |/
|   |  |  |  |   /
|   |  |  |  |  /
|crossbar  |  |  | /
|   |  |  |  |/
|---   |  |  |
||  |  |  |  |  |  |  |  |
||  |  |  |  |  |  |  |  |
|   MACMACMACMACMACMACMACMAC |
||  |  |  |  |  |  |  |  |
--
 |  |  |  |  |  |  |  |
PHYPHYPHYPHYPHYPHYPHYPHY

separate all ports to three platform(new):
 CPU
  |
---
| |   |
---   -
| |  ||   |   |   |   |
|PPE ||  PPE  |   |  PPE  |
| |  ||   |   |   |   |   |
| |  ||   |   |   |   |   |
|  crossbar  ||   |   |   |   |   |
| |  ||   |   |   |   |   |
|   --   ||   |   |   |   |   |
|   | | |  |  |  |   ||   |   |   |   |   |
|   | | |  |  |  |   ||   |   |   |   |   |
|  MAC   MAC   MACMACMACMAC  ||  MAC  |   |  MAC  |
|   | | |  |  |  |   ||   |   |   |   |   |
---   -
| | |  |  |  |\/  |/  |
   PHY   PHY   PHYPHYPHYPHY\  /  PHY  /  PHY
\/   /
 \  /   /
 DSAF(three platform device)

I have tested this patchset on d02 board, it is compatible with the dts in
d02 board.

For more details, please see individual patches.

MBR.

Daode Huang (3):
  net: hns: set debug port irq index to 0
  net: hns: add syscon operation for dsaf
  net: hns: sort the header file by alphabetical order

Yisen Zhuang (6):
  net: hns: add attribute port-idx-in-ae in enet node.
  net: hns: add attribute reset-field-offset for dsaf node
  net: hns: separate debug dsaf device from service dsaf device
  net: hns: add attribute cpld_ctrl for dsaf port node
  net: hns: add attribute port-rst-offset for dsaf port node
  net: hns: add attribute port-mode-offset for dsaf port node

huangdaode (1):
  net: hns: add a new dsaf mode for debug port

 .../devicetree/bindings/net/hisilicon-hns-dsaf.txt |  60 +-
 .../devicetree/bindings/net/hisilicon-hns-nic.txt  |  30 ++-
 drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c  |  73 ++-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c  | 231 ++---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h  |  12 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 141 -
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h |  16 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 173 +--
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c  |  51 +
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h  |   1 -
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c  | 187 +++--
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h  |   2 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h  |  38 ++--
 drivers/net/ethernet/hisilicon/hns/hns_enet.c  |  17 +-
 drivers/net/ethernet/hisilicon/hns/hns_enet.h  |   3 +
 15 file

[PATCH net-next 02/10] net: hns: set debug port irq index to 0

2016-04-27 Thread Yisen Zhuang

From: Daode Huang 

As debug ports are removed from service dsaf to debug dsaf,
its interrupts offset should start from 0, So this patch
re-defines the offset index of debug ports.

Signed-off-by: Daode Huang 
Signed-off-by: Yuzeng Zhuang 
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c | 8 +---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h | 5 +
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 121ba4e..054f391 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -445,20 +445,14 @@ static int hns_rcb_get_port_in_comm(
 
 #define SERVICE_RING_IRQ_IDX(v1) \
((v1) ? HNS_SERVICE_RING_IRQ_IDX : HNSV2_SERVICE_RING_IRQ_IDX)
-#define DEBUG_RING_IRQ_IDX(v1) \
-   ((v1) ? HNS_DEBUG_RING_IRQ_IDX : HNSV2_DEBUG_RING_IRQ_IDX)
-#define DEBUG_RING_IRQ_OFFSET(v1) \
-   ((v1) ? HNS_DEBUG_RING_IRQ_OFFSET : HNSV2_DEBUG_RING_IRQ_OFFSET)
 static int hns_rcb_get_base_irq_idx(struct rcb_common_cb *rcb_common)
 {
-   int comm_index = rcb_common->comm_index;
bool is_ver1 = AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver);
 
if (!HNS_DSAF_IS_DEBUG(rcb_common->dsaf_dev))
return SERVICE_RING_IRQ_IDX(is_ver1);
else
-   return  DEBUG_RING_IRQ_IDX(is_ver1) +
-   (comm_index - 1) * DEBUG_RING_IRQ_OFFSET(is_ver1);
+   return  HNS_DEBUG_RING_IRQ_IDX;
 }
 
 #define RCB_COMM_BASE_TO_RING_BASE(base, ringid)\
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h 
b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index cffd244..8782608 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -10,12 +10,9 @@
 #ifndef _DSAF_REG_H_
 #define _DSAF_REG_H_
 
-#define HNS_DEBUG_RING_IRQ_IDX 55
+#define HNS_DEBUG_RING_IRQ_IDX 0
 #define HNS_SERVICE_RING_IRQ_IDX 59
-#define HNS_DEBUG_RING_IRQ_OFFSET 2
-#define HNSV2_DEBUG_RING_IRQ_IDX 409
 #define HNSV2_SERVICE_RING_IRQ_IDX 25
-#define HNSV2_DEBUG_RING_IRQ_OFFSET 9
 
 #define DSAF_MAX_PORT_NUM_PER_CHIP 8
 #define DSAF_SERVICE_PORT_NUM_PER_DSAF 6
-- 
1.9.1

Re: [RFC PATCH V2 2/2] vhost: device IOTLB API

2016-04-27 Thread Jason Wang



On 04/27/2016 07:45 PM, Michael S. Tsirkin wrote:
> On Fri, Mar 25, 2016 at 10:34:34AM +0800, Jason Wang wrote:
>> This patch tries to implement an device IOTLB for vhost. This could be
>> used with for co-operation with userspace(qemu) implementation of DMA
>> remapping.
>>
>> The idea is simple. When vhost meets an IOTLB miss, it will request
>> the assistance of userspace to do the translation, this is done
>> through:
>>
>> - Fill the translation request in a preset userspace address (This
>>   address is set through ioctl VHOST_SET_IOTLB_REQUEST_ENTRY).
>> - Notify userspace through eventfd (This eventfd was set through ioctl
>>   VHOST_SET_IOTLB_FD).
> Why use an eventfd for this?

The aim is to implement the API all through ioctls.

>  We use them for interrupts because
> that happens to be what kvm wants, but here - why don't we
> just add a generic support for reading out events
> on the vhost fd itself?

I've considered this approach, but what's the advantages of this? I mean
looks like all other ioctls could be done through vhost fd
reading/writing too.

>
>> - device IOTLB were started and stopped through VHOST_RUN_IOTLB ioctl
>>
>> When userspace finishes the translation, it will update the vhost
>> IOTLB through VHOST_UPDATE_IOTLB ioctl. Userspace is also in charge of
>> snooping the IOTLB invalidation of IOMMU IOTLB and use
>> VHOST_UPDATE_IOTLB to invalidate the possible entry in vhost.
> There's one problem here, and that is that VQs still do not undergo
> translation.  In theory VQ could be mapped in such a way
> that it's not contigious in userspace memory.

I'm not sure I get the issue, current vhost API support setting
desc_user_addr, used_user_addr and avail_user_addr independently. So
looks ok? If not, looks not a problem to device IOTLB API itself.

>
>
>> Signed-off-by: Jason Wang 
> What limits amount of entries that kernel keeps around?

It depends on guest working set I think. Looking at
http://dpdk.org/doc/guides/linux_gsg/sys_reqs.html:

- For 2MB page size in guest, it suggests hugepages=1024
- For 1GB page size, it suggests a hugepages=4

So I choose 2048 to make sure it can cover this.

> Do we want at least a mod parameter for this?

Maybe.

>
>> ---
>>  drivers/vhost/net.c|   6 +-
>>  drivers/vhost/vhost.c  | 301 
>> +++--
>>  drivers/vhost/vhost.h  |  17 ++-
>>  fs/eventfd.c   |   3 +-
>>  include/uapi/linux/vhost.h |  35 ++
>>  5 files changed, 320 insertions(+), 42 deletions(-)
>>

[...]

>> +struct vhost_iotlb_entry {
>> +__u64 iova;
>> +__u64 size;
>> +__u64 userspace_addr;
> Alignment requirements?

The API does not require any alignment. Will add a comment for this.

>
>> +struct {
>> +#define VHOST_ACCESS_RO  0x1
>> +#define VHOST_ACCESS_WO  0x2
>> +#define VHOST_ACCESS_RW  0x3
>> +__u8  perm;
>> +#define VHOST_IOTLB_MISS   1
>> +#define VHOST_IOTLB_UPDATE 2
>> +#define VHOST_IOTLB_INVALIDATE 3
>> +__u8  type;
>> +#define VHOST_IOTLB_INVALID0x1
>> +#define VHOST_IOTLB_VALID  0x2
>> +__u8  valid;
> why do we need this flag?

Useless, will remove.

>
>> +__u8  u8_padding;
>> +__u32 padding;
>> +} flags;
>> +};
>> +
>> +struct vhost_vring_iotlb_entry {
>> +unsigned int index;
>> +__u64 userspace_addr;
>> +};
>> +
>>  struct vhost_memory_region {
>>  __u64 guest_phys_addr;
>>  __u64 memory_size; /* bytes */
>> @@ -127,6 +153,15 @@ struct vhost_memory {
>>  /* Set eventfd to signal an error */
>>  #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct 
>> vhost_vring_file)
>>  
>> +/* IOTLB */
>> +/* Specify an eventfd file descriptor to signle on IOTLB miss */
> typo

Will fix it.

>
>> +#define VHOST_SET_VRING_IOTLB_CALL _IOW(VHOST_VIRTIO, 0x23, struct  \
>> +vhost_vring_file)
>> +#define VHOST_SET_VRING_IOTLB_REQUEST _IOW(VHOST_VIRTIO, 0x25, struct   \
>> +   vhost_vring_iotlb_entry)
>> +#define VHOST_UPDATE_IOTLB _IOW(VHOST_VIRTIO, 0x24, struct 
>> vhost_iotlb_entry)
>> +#define VHOST_RUN_IOTLB _IOW(VHOST_VIRTIO, 0x26, int)
>> +
> Is the assumption that userspace must dedicate a thread to running the iotlb? 
> I dislike this one.
> Please support asynchronous APIs at least optionally to make
> userspace make its own threading decisions.

Nope, my qemu patches does not use a dedicated thread. This API is used
to start or top DMAR according to e.g whether guest enable DMAR for
intel IOMMU.

>
>>  /* VHOST_NET specific defines */
>>  
>>  /* Attach virtio net ring to a raw socket, or tap device.
> Don't we need a feature bit for this?

Yes we need it. The feature bit is not considered in this patch and
looks like it was still under discussion. After we finalize it, I will add.

> Are we short on feature bits? If yes maybe it's time to add
> something like PROTOCOL_FE

Re: [PATCH v2] can: rcar_canfd: Add Renesas R-Car CAN FD driver

2016-04-27 Thread Oliver Hartkopp


Hello Ramesh,

please send out a new v3 patchset to trigger the process again :-)

Best regards,
Oliver

On 04/13/2016 08:25 AM, Ramesh Shanmugasundaram wrote:

HI Marc,

Gentle reminder!
Are you happy with the open comment's disposition? I can send a next version of 
patch if we have a closure on current set of comments.

Thanks,
Ramesh


-Original Message-
From: Ramesh Shanmugasundaram
Sent: 01 April 2016 13:49
To: Ramesh Shanmugasundaram ;
w...@grandegger.com; robh...@kernel.org; pawel.m...@arm.com;
mark.rutl...@arm.com; ijc+devicet...@hellion.org.uk; ga...@codeaurora.org;
cor...@lwn.net
Cc: linux-renesas-...@vger.kernel.org; devicet...@vger.kernel.org; linux-
c...@vger.kernel.org; netdev@vger.kernel.org; linux-...@vger.kernel.org;
geert+rene...@glider.be; Chris Paterson 
Subject: RE: [PATCH v2] can: rcar_canfd: Add Renesas R-Car CAN FD driver

Hi Marc,

Thanks for your time & review comments.


-Original Message-

(...)

+#define RCANFD_NAPI_WEIGHT 8   /* Rx poll quota */
+
+#define RCANFD_NUM_CHANNELS2
+#define RCANFD_CHANNELS_MASK   0x3 /* Two channels max */


(BIT(RCANFD_NUM_CHANNELS) - 1


OK




+
+/* Rx FIFO is a global resource of the controller. There are 8 such

FIFOs

+ * available. Each channel gets a dedicated Rx FIFO (i.e.) the
+ channel

(...)

+#define RCANFD_CMFIFO_CFDLC(x) (((x) & 0xf) << 28)
+#define RCANFD_CMFIFO_CFPTR(x) (((x) & 0xfff) << 16)
+#define RCANFD_CMFIFO_CFTS(x)  (((x) & 0xff) << 0)
+
+/* Global Test Config register */
+#define RCANFD_GTSTCFG_C0CBCE  BIT(0)
+#define RCANFD_GTSTCFG_C1CBCE  BIT(1)
+
+#define RCANFD_GTSTCTR_ICBCTME BIT(0)
+
+/* AFL Rx rules registers */
+#define RCANFD_AFLCFG_SETRNC0(x)   (((x) & 0xff) << 24)
+#define RCANFD_AFLCFG_SETRNC1(x)   (((x) & 0xff) << 16)


What about something like:

#define RCANFD_AFLCFG_SETRNC(n, x)  (((x) & 0xff) << (24 - n * 8))

This will save some if()s in the code


Nice :-). Will update.




+#define RCANFD_AFLCFG_GETRNC0(x)   (((x) >> 24) & 0xff)
+#define RCANFD_AFLCFG_GETRNC1(x)   (((x) >> 16) & 0xff)
+
+#define RCANFD_AFL_PAGENUM(entry)  ((entry) / 16)

(...)

+#define rcar_canfd_read(priv, offset)  \
+   readl(priv->base + (offset))
+#define rcar_canfd_write(priv, offset, val)\
+   writel(val, priv->base + (offset))
+#define rcar_canfd_set_bit(priv, reg, val) \
+   rcar_canfd_update(val, val, priv->base + (reg))
+#define rcar_canfd_clear_bit(priv, reg, val)   \
+   rcar_canfd_update(val, 0, priv->base + (reg))
+#define rcar_canfd_update_bit(priv, reg, mask, val)\
+   rcar_canfd_update(mask, val, priv->base + (reg))


please use static inline functions instead of defines.


OK.




+
+static void rcar_canfd_get_data(struct canfd_frame *cf,
+   struct rcar_canfd_channel *priv, u32 off)


Please use "struct rcar_canfd_channel *priv" as first argument, struct
canfd_frame *cf as second. Remove off, as the offset is already
defined by the channel.


I'll re-order priv, cf as you mentioned. I'll leave "off" arg as it is
because it is based on FIFO number of channel + mode (CAN only or CANFD
only). Otherwise, I will have to add another check inside this function
for mode.


+{
+   u32 i, lwords;
+
+   lwords = cf->len / sizeof(u32);
+   if (cf->len % sizeof(u32))
+   lwords++;


Use DIV_ROUND_UP() instead of open coding it.


Agreed. Thanks.




+   for (i = 0; i < lwords; i++)
+   *((u32 *)cf->data + i) =
+   rcar_canfd_read(priv, off + (i * sizeof(u32))); }
+
+static void rcar_canfd_put_data(struct canfd_frame *cf,
+   struct rcar_canfd_channel *priv, u32 off)


same here


Yes (same as _get_data)




+{
+   u32 i, j, lwords, leftover;
+   u32 data = 0;
+
+   lwords = cf->len / sizeof(u32);
+   leftover = cf->len % sizeof(u32);
+   for (i = 0; i < lwords; i++)
+   rcar_canfd_write(priv, off + (i * sizeof(u32)),
+*((u32 *)cf->data + i));


Here you don't convert the endianess...


Yes


+
+   if (leftover) {
+   u8 *p = (u8 *)((u32 *)cf->data + i);
+
+   for (j = 0; j < leftover; j++)
+   data |= p[j] << (j * 8);


...here you do an implicit endianess conversion. "data" is little
endian, while p[j] is big endian.


Not sure I got the question correctly.
Controller expectation of data bytes in 32bit register is bits[7:0] =
byte0, bits[15:8] = byte1 and so on - little endian.
For e.g. if cf->data points to byte stream H'112233445566 (cf->data[0] =
0x11), first rcar_canfd_write will write 0x44332211 value to register. Yes
the host cpu is assumed little endian. In leftover case, data will be
0x6655 - again little endian.
I think I should remove this leftover logic and just mask the unused bits
to zero as

Re: [RFC PATCH V2 1/2] vhost: convert pre sorted vhost memory array to interval tree

2016-04-27 Thread Jason Wang



On 04/27/2016 07:30 PM, Michael S. Tsirkin wrote:
> On Fri, Mar 25, 2016 at 10:34:33AM +0800, Jason Wang wrote:
>> > Current pre-sorted memory region array has some limitations for future
>> > device IOTLB conversion:
>> > 
>> > 1) need extra work for adding and removing a single region, and it's
>> >expected to be slow because of sorting or memory re-allocation.
>> > 2) need extra work of removing a large range which may intersect
>> >several regions with different size.
>> > 3) need trick for a replacement policy like LRU
>> > 
>> > To overcome the above shortcomings, this patch convert it to interval
>> > tree which can easily address the above issue with almost no extra
>> > work.
>> > 
>> > The patch could be used for:
>> > 
>> > - Extend the current API and only let the userspace to send diffs of
>> >   memory table.
>> > - Simplify Device IOTLB implementation.
> Does this affect performance at all?
>

In pktgen test, no difference.

Thanks

Re: [PATCH] vhost_net: stop polling socket during rx processing

2016-04-27 Thread Jason Wang



On 04/27/2016 07:28 PM, Michael S. Tsirkin wrote:
> On Tue, Apr 26, 2016 at 03:35:53AM -0400, Jason Wang wrote:
>> We don't stop polling socket during rx processing, this will lead
>> unnecessary wakeups from under layer net devices (E.g
>> sock_def_readable() form tun). Rx will be slowed down in this
>> way. This patch avoids this by stop polling socket during rx
>> processing. A small drawback is that this introduces some overheads in
>> light load case because of the extra start/stop polling, but single
>> netperf TCP_RR does not notice any change. In a super heavy load case,
>> e.g using pktgen to inject packet to guest, we get about ~17%
>> improvement on pps:
>>
>> before: ~137 pkt/s
>> after:  ~150 pkt/s
>>
>> Signed-off-by: Jason Wang 
> Acked-by: Michael S. Tsirkin 
>
> There is one other possible enhancement: we actually have the wait queue
> lock taken in _wake_up, but we give it up only to take it again in the
> handler.
>
> It would be nicer to just remove the entry when we wake
> the vhost thread. Re-add it if required.
> I think that something like the below would give you the necessary API.
> Pls feel free to use it if you are going to implement a patch on top
> doing this - that's not a reason not to include this simple patch
> though.

Thanks, this looks useful, will give it a try.

>
> --->
>
> wait: add API to drop a wait_queue_t entry from wake up handler
>
> A wake up handler might want to remove its own wait queue entry to avoid
> future wakeups.  In particular, vhost has such a need.  As wait queue
> lock is already taken, all we need is an API to remove the entry without
> wait_queue_head_t which isn't currently accessible to wake up handlers.
>
> Signed-off-by: Michael S. Tsirkin 
>
> ---
>
> diff --git a/include/linux/wait.h b/include/linux/wait.h
> index 27d7a0a..9c6604b 100644
> --- a/include/linux/wait.h
> +++ b/include/linux/wait.h
> @@ -191,11 +191,17 @@ __add_wait_queue_tail_exclusive(wait_queue_head_t *q, 
> wait_queue_t *wait)
>  }
>  
>  static inline void
> -__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
> +__remove_wait_queue_entry(wait_queue_t *old)
>  {
>   list_del(&old->task_list);
>  }
>  
> +static inline void
> +__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
> +{
> + __remove_wait_queue_entry(old);
> +}
> +
>  typedef int wait_bit_action_f(struct wait_bit_key *, int mode);
>  void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
>  void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void 
> *key);

[PATCH net-next 6/6] net: do not block BH while processing socket backlog

2016-04-27 Thread Eric Dumazet

Socket backlog processing is a major latency source.

With current TCP socket sk_rcvbuf limits, I have sampled __release_sock()
holding cpu for more than 5 ms, and packets being dropped by the NIC
once ring buffer is filled.

All users are now ready to be called from process context,
we can unblock BH and let interrupts be serviced faster.

cond_resched_softirq() could be removed, as it has no more user.

Signed-off-by: Eric Dumazet 
---
 net/core/sock.c | 22 --
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index e16a5db853c6..70744dbb6c3f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2019,33 +2019,27 @@ static void __release_sock(struct sock *sk)
__releases(&sk->sk_lock.slock)
__acquires(&sk->sk_lock.slock)
 {
-   struct sk_buff *skb = sk->sk_backlog.head;
+   struct sk_buff *skb, *next;
 
-   do {
+   while ((skb = sk->sk_backlog.head) != NULL) {
sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
-   bh_unlock_sock(sk);
 
-   do {
-   struct sk_buff *next = skb->next;
+   spin_unlock_bh(&sk->sk_lock.slock);
 
+   do {
+   next = skb->next;
prefetch(next);
WARN_ON_ONCE(skb_dst_is_noref(skb));
skb->next = NULL;
sk_backlog_rcv(sk, skb);
 
-   /*
-* We are in process context here with softirqs
-* disabled, use cond_resched_softirq() to preempt.
-* This is safe to do because we've taken the backlog
-* queue private:
-*/
-   cond_resched_softirq();
+   cond_resched();
 
skb = next;
} while (skb != NULL);
 
-   bh_lock_sock(sk);
-   } while ((skb = sk->sk_backlog.head) != NULL);
+   spin_lock_bh(&sk->sk_lock.slock);
+   }
 
/*
 * Doing the zeroing here guarantee we can not loop forever
-- 
2.8.0.rc3.226.g39d4020

[PATCH net-next 2/6] tcp: do not block bh during prequeue processing

2016-04-27 Thread Eric Dumazet

AFAIK, nothing in current TCP stack absolutely wants BH
being disabled once socket is owned by a thread running in
process context.

As mentioned in my prior patch ("tcp: give prequeue mode some care"),
processing a batch of packets might take time, better not block BH
at all.

Signed-off-by: Eric Dumazet 
---
 net/ipv4/tcp.c   |  4 
 net/ipv4/tcp_input.c | 30 ++
 2 files changed, 2 insertions(+), 32 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7f51389814e6..f8856b76f941 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1445,12 +1445,8 @@ static void tcp_prequeue_process(struct sock *sk)
 
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
 
-   /* RX process wants to run with disabled BHs, though it is not
-* necessary */
-   local_bh_disable();
while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
sk_backlog_rcv(sk, skb);
-   local_bh_enable();
 
/* Clear memory counter. */
tp->ucopy.memory = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0eb31df8edfa..44e0f9f15f32 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4608,14 +4608,12 @@ static void tcp_data_queue(struct sock *sk, struct 
sk_buff *skb)
 
__set_current_state(TASK_RUNNING);
 
-   local_bh_enable();
if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, 
chunk)) {
tp->ucopy.len -= chunk;
tp->copied_seq += chunk;
eaten = (chunk == skb->len);
tcp_rcv_space_adjust(sk);
}
-   local_bh_disable();
}
 
if (eaten <= 0) {
@@ -5131,7 +5129,6 @@ static int tcp_copy_to_iovec(struct sock *sk, struct 
sk_buff *skb, int hlen)
int chunk = skb->len - hlen;
int err;
 
-   local_bh_enable();
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
else
@@ -5143,32 +5140,9 @@ static int tcp_copy_to_iovec(struct sock *sk, struct 
sk_buff *skb, int hlen)
tcp_rcv_space_adjust(sk);
}
 
-   local_bh_disable();
return err;
 }
 
-static __sum16 __tcp_checksum_complete_user(struct sock *sk,
-   struct sk_buff *skb)
-{
-   __sum16 result;
-
-   if (sock_owned_by_user(sk)) {
-   local_bh_enable();
-   result = __tcp_checksum_complete(skb);
-   local_bh_disable();
-   } else {
-   result = __tcp_checksum_complete(skb);
-   }
-   return result;
-}
-
-static inline bool tcp_checksum_complete_user(struct sock *sk,
-struct sk_buff *skb)
-{
-   return !skb_csum_unnecessary(skb) &&
-  __tcp_checksum_complete_user(sk, skb);
-}
-
 /* Does PAWS and seqno based validation of an incoming segment, flags will
  * play significant role here.
  */
@@ -5383,7 +5357,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff 
*skb,
}
}
if (!eaten) {
-   if (tcp_checksum_complete_user(sk, skb))
+   if (tcp_checksum_complete(skb))
goto csum_error;
 
if ((int)skb->truesize > sk->sk_forward_alloc)
@@ -5427,7 +5401,7 @@ no_ack:
}
 
 slow_path:
-   if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
+   if (len < (th->doff << 2) || tcp_checksum_complete(skb))
goto csum_error;
 
if (!th->ack && !th->rst && !th->syn)
-- 
2.8.0.rc3.226.g39d4020

[PATCH net-next 4/6] udp: prepare for non BH masking at backlog processing

2016-04-27 Thread Eric Dumazet

UDP uses the generic socket backlog code, and this will soon
be changed to not disable BH when protocol is called back.

We need to use appropriate SNMP accessors.

Signed-off-by: Eric Dumazet 
---
 net/ipv4/udp.c | 4 ++--
 net/ipv6/udp.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 093284c5c03b..f67f52ba4809 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1514,9 +1514,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct 
sk_buff *skb)
 
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
-   __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+   UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
is_udplite);
-   __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+   UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
trace_udp_fail_queue_rcv_skb(rc, sk);
return -1;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1ba5a74ac18f..f911c63f79e6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -570,9 +570,9 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct 
sk_buff *skb)
 
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
-   __UDP6_INC_STATS(sock_net(sk),
+   UDP6_INC_STATS(sock_net(sk),
 UDP_MIB_RCVBUFERRORS, is_udplite);
-   __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+   UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
return -1;
}
-- 
2.8.0.rc3.226.g39d4020

[PATCH net-next 3/6] dccp: do not assume DCCP code is non preemptible

2016-04-27 Thread Eric Dumazet

DCCP uses the generic backlog code, and this will soon
be changed to not disable BH when protocol is called back.

Signed-off-by: Eric Dumazet 
---
 net/dccp/input.c   | 2 +-
 net/dccp/ipv4.c| 4 ++--
 net/dccp/ipv6.c| 4 ++--
 net/dccp/options.c | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 2437ecc13b82..ba347184bda9 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -359,7 +359,7 @@ send_sync:
goto discard;
}
 
-   __DCCP_INC_STATS(DCCP_MIB_INERRS);
+   DCCP_INC_STATS(DCCP_MIB_INERRS);
 discard:
__kfree_skb(skb);
return 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index a8164272e0f4..5c7e413a3ae4 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -533,8 +533,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, 
struct sk_buff *rxskb)
bh_unlock_sock(ctl_sk);
 
if (net_xmit_eval(err) == 0) {
-   __DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
-   __DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
+   DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+   DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
}
 out:
 dst_release(dst);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 0f4eb4ea57a5..d176f4e66369 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -277,8 +277,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, 
struct sk_buff *rxskb)
if (!IS_ERR(dst)) {
skb_dst_set(skb, dst);
ip6_xmit(ctl_sk, skb, &fl6, NULL, 0);
-   __DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
-   __DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
+   DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+   DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
return;
}
 
diff --git a/net/dccp/options.c b/net/dccp/options.c
index b82b7ee9a1d2..74d29c56c367 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -253,7 +253,7 @@ out_nonsensical_length:
return 0;
 
 out_invalid_option:
-   __DCCP_INC_STATS(DCCP_MIB_INVALIDOPT);
+   DCCP_INC_STATS(DCCP_MIB_INVALIDOPT);
rc = DCCP_RESET_CODE_OPTION_ERROR;
 out_featneg_failed:
DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc);
-- 
2.8.0.rc3.226.g39d4020

[PATCH net-next 1/6] tcp: do not assume TCP code is non preemptible

2016-04-27 Thread Eric Dumazet

We want to to make TCP stack preemptible, as draining prequeue
and backlog queues can take lot of time.

Many SNMP updates were assuming that BH (and preemption) was disabled.

Need to convert some __NET_INC_STATS() calls to NET_INC_STATS()
and some __TCP_INC_STATS() to TCP_INC_STATS()

Before using this_cpu_ptr(net->ipv4.tcp_sk) in tcp_v4_send_reset()
and tcp_v4_send_ack(), we add an explicit preempt disabled section.

Signed-off-by: Eric Dumazet 
---
 net/ipv4/tcp.c   |  2 +-
 net/ipv4/tcp_cdg.c   | 20 +-
 net/ipv4/tcp_cubic.c | 20 +-
 net/ipv4/tcp_fastopen.c  | 12 +++---
 net/ipv4/tcp_input.c | 96 
 net/ipv4/tcp_ipv4.c  | 14 ---
 net/ipv4/tcp_minisocks.c |  2 +-
 net/ipv4/tcp_output.c|  7 ++--
 net/ipv4/tcp_recovery.c  |  4 +-
 net/ipv4/tcp_timer.c | 10 +++--
 net/ipv6/tcp_ipv6.c  | 12 +++---
 11 files changed, 102 insertions(+), 97 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 040f35e7efe0..7f51389814e6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3091,7 +3091,7 @@ void tcp_done(struct sock *sk)
struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
 
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
-   __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
+   TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
 
tcp_set_state(sk, TCP_CLOSE);
tcp_clear_xmit_timers(sk);
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 3c00208c37f4..4e3007845888 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -155,11 +155,11 @@ static void tcp_cdg_hystart_update(struct sock *sk)
 
ca->last_ack = now_us;
if (after(now_us, ca->round_start + base_owd)) {
-   __NET_INC_STATS(sock_net(sk),
-   
LINUX_MIB_TCPHYSTARTTRAINDETECT);
-   __NET_ADD_STATS(sock_net(sk),
-   LINUX_MIB_TCPHYSTARTTRAINCWND,
-   tp->snd_cwnd);
+   NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINDETECT);
+   NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINCWND,
+ pp>>sn__cwdd);
tp->snd_ssthresh = tp->snd_cwnd;
return;
}
@@ -174,11 +174,11 @@ static void tcp_cdg_hystart_update(struct sock *sk)
 125U);
 
if (ca->rtt.min > thresh) {
-   __NET_INC_STATS(sock_net(sk),
-   
LINUX_MIB_TCPHYSTARTDELAYDETECT);
-   __NET_ADD_STATS(sock_net(sk),
-   LINUX_MIB_TCPHYSTARTDELAYCWND,
-   tp->snd_cwnd);
+   NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYDETECT);
+   NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTDELAYCWND,
+ tp->snd_cwnd);
tp->snd_ssthresh = tp->snd_cwnd;
}
}
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 59155af9de5d..0ce946e395e1 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -402,11 +402,11 @@ static void hystart_update(struct sock *sk, u32 delay)
ca->last_ack = now;
if ((s32)(now - ca->round_start) > ca->delay_min >> 4) {
ca->found |= HYSTART_ACK_TRAIN;
-   __NET_INC_STATS(sock_net(sk),
-   
LINUX_MIB_TCPHYSTARTTRAINDETECT);
-   __NET_ADD_STATS(sock_net(sk),
-   LINUX_MIB_TCPHYSTARTTRAINCWND,
-   tp->snd_cwnd);
+   NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINDETECT);
+   NET_ADD_STATS(sock_net(sk),
+ LINUX_MIB_TCPHYSTARTTRAINCWND,
+ tp->snd_cwnd);
tp->snd_ssthresh = tp->snd_cwnd;
}
}
@@ -423,11 +423,11 @@ static void hystart_update(struct sock *sk, u32 delay)
if (ca->curr_rtt > ca->delay_min +

[PATCH net-next 5/6] sctp: prepare for socket backlog behavior change

2016-04-27 Thread Eric Dumazet

sctp_inq_push() will soon be called without BH being blocked
when generic socket code flushes the socket backlog.

It is very possible SCTP can be converted to not rely on BH,
but this needs to be done by SCTP experts.

Signed-off-by: Eric Dumazet 
---
 net/sctp/inqueue.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index b335ffcef0b9..9d87bba0ff1d 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -89,10 +89,12 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk 
*chunk)
 * Eventually, we should clean up inqueue to not rely
 * on the BH related data structures.
 */
+   local_bh_disable();
list_add_tail(&chunk->list, &q->in_chunk_list);
if (chunk->asoc)
chunk->asoc->stats.ipackets++;
q->immediate.func(&q->immediate);
+   local_bh_enable();
 }
 
 /* Peek at the next chunk on the inqeue. */
-- 
2.8.0.rc3.226.g39d4020

[PATCH net-next 0/6] net: make TCP preemptible

2016-04-27 Thread Eric Dumazet

Most of TCP stack assumed it was running from BH handler.

This is great for most things, as TCP behavior is very sensitive
to scheduling artifacts.

However, the prequeue and backlog processing are problematic,
as they need to be flushed with BH being blocked.

To cope with modern needs, TCP sockets have big sk_rcvbuf values,
in the order of 16 MB.
This means that backlog can hold thousands of packets, and things
like TCP coalescing or collapsing on this amount of packets can
lead to insane latency spikes, since BH are blocked for too long.

It is time to make UDP/TCP stacks preemptible.

Note that fast path still runs from BH handler.

Eric Dumazet (6):
  tcp: do not assume TCP code is non preemptible
  tcp: do not block bh during prequeue processing
  dccp: do not assume DCCP code is non preemptible
  udp: prepare for non BH masking at backlog processing
  sctp: prepare for socket backlog behavior change
  net: do not block BH while processing socket backlog

 net/core/sock.c  |  22 +++--
 net/dccp/input.c |   2 +-
 net/dccp/ipv4.c  |   4 +-
 net/dccp/ipv6.c  |   4 +-
 net/dccp/options.c   |   2 +-
 net/ipv4/tcp.c   |   6 +--
 net/ipv4/tcp_cdg.c   |  20 
 net/ipv4/tcp_cubic.c |  20 
 net/ipv4/tcp_fastopen.c  |  12 ++---
 net/ipv4/tcp_input.c | 126 +++
 net/ipv4/tcp_ipv4.c  |  14 --
 net/ipv4/tcp_minisocks.c |   2 +-
 net/ipv4/tcp_output.c|   7 ++-
 net/ipv4/tcp_recovery.c  |   4 +-
 net/ipv4/tcp_timer.c |  10 ++--
 net/ipv4/udp.c   |   4 +-
 net/ipv6/tcp_ipv6.c  |  12 ++---
 net/ipv6/udp.c   |   4 +-
 net/sctp/inqueue.c   |   2 +
 19 files changed, 124 insertions(+), 153 deletions(-)

-- 
2.8.0.rc3.226.g39d4020

Re: [PATCH net-next 2/7] net: rtnetlink: allow only one idx saving stats attribute

2016-04-27 Thread Roopa Prabhu

On 4/27/16, 9:18 AM, Nikolay Aleksandrov wrote:
> We can't allow more than one stats attribute which uses the local idx
> since the result will be a mess. This is a simple check to make sure
> only one is being used at a time. Later when the filter_mask's 32 bits
> are over we can switch to a bitmap.
>
> Signed-off-by: Nikolay Aleksandrov 
> ---
>  include/net/rtnetlink.h |  6 ++
>  net/core/rtnetlink.c| 17 +++--
>  2 files changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
> index 2f87c1ba13de..3f3b0b1b8722 100644
> --- a/include/net/rtnetlink.h
> +++ b/include/net/rtnetlink.h
> @@ -150,4 +150,10 @@ int rtnl_nla_parse_ifla(struct nlattr **tb, const struct 
> nlattr *head, int len);
>  
>  #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
>  
> +/* at most one attribute which can save a local idx is allowed to be set
> + * IFLA_STATS_IDX_ATTR_MASK has all the idx saving attributes set and is
> + * used to check if more than one is being requested
> + */
> +#define IFLA_STATS_IDX_ATTR_MASK 0
> +
>  #endif
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index aeb2fa9b1cda..ea03b6cd3d3c 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -3512,7 +3512,7 @@ static int rtnl_stats_get(struct sk_buff *skb, struct 
> nlmsghdr *nlh)
>   struct if_stats_msg *ifsm;
>   struct net_device *dev = NULL;
>   struct sk_buff *nskb;
> - u32 filter_mask;
> + u32 filter_mask, lidx_filter;
>   int lidx = 0;
>   int err;
>  
> @@ -3529,6 +3529,14 @@ static int rtnl_stats_get(struct sk_buff *skb, struct 
> nlmsghdr *nlh)
>   if (!filter_mask)
>   return -EINVAL;
>  
> + /* only one attribute which can save a local idx is allowed at a time
> +  * even though rtnl_stats_get doesn't save the lidx, we need to be
> +  * consistent with the dump side and error out
> +  */
> + lidx_filter = filter_mask & IFLA_STATS_IDX_ATTR_MASK;
> + if (lidx_filter && !is_power_of_2(lidx_filter))
> + return -EINVAL;
> +
>   nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL);
>   if (!nskb)
>   return -ENOBUFS;
> @@ -3556,7 +3564,7 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct 
> netlink_callback *cb)
>   struct net_device *dev;
>   struct hlist_head *head;
>   unsigned int flags = NLM_F_MULTI;
> - u32 filter_mask = 0;
> + u32 filter_mask = 0, lidx_filter;
>   int err;
>  
>   s_h = cb->args[0];
> @@ -3570,6 +3578,11 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct 
> netlink_callback *cb)
>   if (!filter_mask)
>   return -EINVAL;
>  
> + /* only one attribute which can save a local idx is allowed at a time */
> + lidx_filter = filter_mask & IFLA_STATS_IDX_ATTR_MASK;
> + if (lidx_filter && !is_power_of_2(lidx_filter))
> + return -EINVAL;
> +
>   
instead of introducing the restriction at this level, is it possible to use two 
args for this
like below and avoid the restriction ?
cb->args[2] = current filter being processed
cb->args[3] = private filter idx (your lidx)

Re: iproute2: bash completion function for tc

2016-04-27 Thread Stephen Hemminger

On Wed, 27 Apr 2016 20:19:26 -0700
Alexei Starovoitov  wrote:

> On Tue, Apr 26, 2016 at 09:28:17AM +0200, Quentin Monnet wrote:
> > Hi Jamal, Stephen,
> > 
> > I searched for a function providing auto-completion for `tc` utility in
> > bash, but I found none. So I have created one, and I would like share it
> > with the community. It is available here:
> > https://github.com/6WIND/tc_bash-completion/blob/master/tc
> > I would like to make it easily available to tc users, so here is a
> > twofold request:
> > 
> > * I do not know where to submit the code. Should I submit here on netdev
> > for inclusion in iproute2 package, or rather to the bash-completion
> > repository on GitHub? I feel like it would receive better feedback and
> > updates if pushed to iproute2. Could you please provide some advice here?
> > * The completion for `tc` seems to work well; I have tested it with many
> > commands, but I am no tc expert, and there are probably some cases where
> > the completion fails to propose the correct choices. I would be really
> > interested in any feedback/bug reports that you, or anyone on this list
> > who uses tc, could provide.
> 
> that looks very interesting.
> I think making it a part of iproute2 is a good thing.
> How about installing it into /etc/iproute2/ ?
> Stephen, any comments?
> 

I am ok with keeping it in the repository.
But it would need to be installed in the standard bash directory,
is that distro dependent?

Re: [PATCH v2 net-next 2/2] tcp: remove SKBTX_ACK_TSTAMP since it is redundant

2016-04-27 Thread Eric Dumazet

On Wed, 2016-04-27 at 23:39 -0400, Soheil Hassas Yeganeh wrote:
> From: Soheil Hassas Yeganeh 
> 
> The SKBTX_ACK_TSTAMP flag is set in skb_shinfo->tx_flags when
> the timestamp of the TCP acknowledgement should be reported on
> error queue. Since accessing skb_shinfo is likely to incur a
> cache-line miss at the time of receiving the ack, the
> txstamp_ack bit was added in tcp_skb_cb, which is set iff
> the SKBTX_ACK_TSTAMP flag is set for an skb. This makes
> SKBTX_ACK_TSTAMP flag redundant.
> 
> Remove the SKBTX_ACK_TSTAMP and instead use the txstamp_ack bit
> everywhere.
> 
> Note that this frees one bit in shinfo->tx_flags.
> 
> Signed-off-by: Soheil Hassas Yeganeh 
> Acked-by: Martin KaFai Lau 
> Suggested-by: Willem de Bruijn 
> ---

Acked-by: Eric Dumazet

Re: [PATCH v2 net-next 1/2] tcp: remove an unnecessary check in tcp_tx_timestamp

2016-04-27 Thread Eric Dumazet

On Wed, 2016-04-27 at 23:39 -0400, Soheil Hassas Yeganeh wrote:
> From: Soheil Hassas Yeganeh 
> 
> Remove the redundant check for sk->sk_tsflags in tcp_tx_timestamp.
> 
> tcp_tx_timestamp() receives the tsflags as a parameter. As a
> result the "sk->sk_tsflags || tsflags" is redundant, since
> tsflags already includes sk->sk_tsflags plus overrides from
> control messages.
> 
> Signed-off-by: Soheil Hassas Yeganeh 
> ---

Acked-by: Eric Dumazet

Re: [RFC PATCH 4/5] bnxt: Add support for segmentation of tunnels with outer checksums

2016-04-27 Thread Michael Chan

On Wed, Apr 27, 2016 at 8:21 AM, Alexander Duyck
 wrote:
> On Tue, Apr 26, 2016 at 10:55 PM, Michael Chan
>  wrote:
>> On Tue, Apr 19, 2016 at 12:06 PM, Alexander Duyck  
>> wrote:
>>> This patch assumes that the bnxt hardware will ignore existing IPv4/v6
>>> header fields for length and checksum as well as the length and checksum
>>> fields for outer UDP and GRE headers.
>>>
>>> I have no means of testing this as I do not have any bnx2x hardware but
>>> thought I would submit it as an RFC to see if anyone out there wants to
>>> test this and see if this does in fact enable this functionality allowing
>>> us to to segment tunneled frames that have an outer checksum.
>>>
>>> Signed-off-by: Alexander Duyck 
>>
>> Hi Alex, I just did a very quick test of this patch on our bnxt
>> hardware and it seemed to work.
>>
>> I created a vxlan endpoint with udpcsum enabled and I saw TSO packets
>> getting through.  I've verified that our hardware can be programmed to
>> either ignore outer UDP checksum or to calculate it.  Current default
>> is to ignore ipv4 UDP checksum and calculate ipv6 UDP checksum.
>> Thanks.
>
> Are you saying you can natively support UDP tunnel with outer checksum
> offload then?

Yes.  Calculate or ignore the outer UDP checksum.

>
> I'm just trying to sort out if you actually need to have the partial
> segmentation offload support or if we can handle it in hardware.  Also
> is there any documentation you could point me to that might help to
> clarify what the hardware does/doesn't support so that I could improve
> upon this patch in order to make sure we are getting the most bang for
> the buck in terms of the features that can be offloaded by hardware?

No public documentation yet.  I think the plan is to publish the
programmer's reference on our website at some point in the future.

[PATCH v2 net-next 2/2] tcp: remove SKBTX_ACK_TSTAMP since it is redundant

2016-04-27 Thread Soheil Hassas Yeganeh

From: Soheil Hassas Yeganeh 

The SKBTX_ACK_TSTAMP flag is set in skb_shinfo->tx_flags when
the timestamp of the TCP acknowledgement should be reported on
error queue. Since accessing skb_shinfo is likely to incur a
cache-line miss at the time of receiving the ack, the
txstamp_ack bit was added in tcp_skb_cb, which is set iff
the SKBTX_ACK_TSTAMP flag is set for an skb. This makes
SKBTX_ACK_TSTAMP flag redundant.

Remove the SKBTX_ACK_TSTAMP and instead use the txstamp_ack bit
everywhere.

Note that this frees one bit in shinfo->tx_flags.

Signed-off-by: Soheil Hassas Yeganeh 
Acked-by: Martin KaFai Lau 
Suggested-by: Willem de Bruijn 
---
 include/linux/skbuff.h |  6 +-
 net/ipv4/tcp.c |  5 +++--
 net/ipv4/tcp_input.c   |  3 +--
 net/ipv4/tcp_output.c  | 17 +++--
 net/socket.c   |  3 ---
 5 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index da0ace3..ae30555 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -382,14 +382,10 @@ enum {
 
/* generate software time stamp when entering packet scheduling */
SKBTX_SCHED_TSTAMP = 1 << 6,
-
-   /* generate software timestamp on peer data acknowledgment */
-   SKBTX_ACK_TSTAMP = 1 << 7,
 };
 
 #define SKBTX_ANY_SW_TSTAMP(SKBTX_SW_TSTAMP| \
-SKBTX_SCHED_TSTAMP | \
-SKBTX_ACK_TSTAMP)
+SKBTX_SCHED_TSTAMP)
 #define SKBTX_ANY_TSTAMP   (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
 
 /*
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3c542dc..8e05eb6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -435,9 +435,10 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, 
struct sk_buff *skb)
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags);
-   if (shinfo->tx_flags & SKBTX_ANY_TSTAMP)
+   if (tsflags & SOF_TIMESTAMPING_TX_ACK)
+   tcb->txstamp_ack = 1;
+   if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
-   tcb->txstamp_ack = !!(shinfo->tx_flags & SKBTX_ACK_TSTAMP);
}
 }
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 967520d..2f3fd92 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3087,8 +3087,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct 
sk_buff *skb,
return;
 
shinfo = skb_shinfo(skb);
-   if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
-   !before(shinfo->tskey, prior_snd_una) &&
+   if (!before(shinfo->tskey, prior_snd_una) &&
before(shinfo->tskey, tcp_sk(sk)->snd_una))
__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
 }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9d3b4b3..ace183c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -,11 +,17 @@ static void tcp_adjust_pcount(struct sock *sk, const 
struct sk_buff *skb, int de
tcp_verify_left_out(tp);
 }
 
+static bool tcp_has_tx_tstamp(const struct sk_buff *skb)
+{
+   return TCP_SKB_CB(skb)->txstamp_ack ||
+   (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP);
+}
+
 static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
 {
struct skb_shared_info *shinfo = skb_shinfo(skb);
 
-   if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) &&
+   if (unlikely(tcp_has_tx_tstamp(skb)) &&
!before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
@@ -2446,13 +2452,12 @@ u32 __tcp_select_window(struct sock *sk)
 void tcp_skb_collapse_tstamp(struct sk_buff *skb,
 const struct sk_buff *next_skb)
 {
-   const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb);
-   u8 tsflags = next_shinfo->tx_flags & SKBTX_ANY_TSTAMP;
-
-   if (unlikely(tsflags)) {
+   if (unlikely(tcp_has_tx_tstamp(next_skb))) {
+   const struct skb_shared_info *next_shinfo =
+   skb_shinfo(next_skb);
struct skb_shared_info *shinfo = skb_shinfo(skb);
 
-   shinfo->tx_flags |= tsflags;
+   shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP;
shinfo->tskey = next_shinfo->tskey;
TCP_SKB_CB(skb)->txstamp_ack |=
TCP_SKB_CB(next_skb)->txstamp_ack;
diff --git a/net/socket.c b/net/socket.c
index 5dbb0bb..7789d79 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -600,9 +600,6 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
flags |= SKBTX_SCHED_TSTAMP;
 
-   if (tsflags & SOF_TIMESTAMPING_TX_ACK)
-

[PATCH v2 net-next 1/2] tcp: remove an unnecessary check in tcp_tx_timestamp

2016-04-27 Thread Soheil Hassas Yeganeh

From: Soheil Hassas Yeganeh 

Remove the redundant check for sk->sk_tsflags in tcp_tx_timestamp.

tcp_tx_timestamp() receives the tsflags as a parameter. As a
result the "sk->sk_tsflags || tsflags" is redundant, since
tsflags already includes sk->sk_tsflags plus overrides from
control messages.

Signed-off-by: Soheil Hassas Yeganeh 
---
 net/ipv4/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4d73858..3c542dc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -430,7 +430,7 @@ EXPORT_SYMBOL(tcp_init_sock);
 
 static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
 {
-   if (sk->sk_tsflags || tsflags) {
+   if (tsflags) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
-- 
2.8.0.rc3.226.g39d4020

[PATCH v2 net-next 0/2] tcp: simplify ack tx timestamps

2016-04-27 Thread Soheil Hassas Yeganeh

From: Soheil Hassas Yeganeh 

v2:
- Fully remove SKBTX_ACK_TSTAMP, as suggested by Willem de Bruijn.

This patch series aims at removing redundant checks and fields
for ack timestamps for TCP.

Soheil Hassas Yeganeh (2):
  tcp: remove an unnecessary check in tcp_tx_timestamp
  tcp: remove SKBTX_ACK_TSTAMP since it is redundant

 include/linux/skbuff.h |  6 +-
 net/ipv4/tcp.c |  7 ---
 net/ipv4/tcp_input.c   |  3 +--
 net/ipv4/tcp_output.c  | 17 +++--
 net/socket.c   |  3 ---
 5 files changed, 17 insertions(+), 19 deletions(-)

-- 
2.8.0.rc3.226.g39d4020

Re: iproute2: bash completion function for tc

2016-04-27 Thread Alexei Starovoitov

On Tue, Apr 26, 2016 at 09:28:17AM +0200, Quentin Monnet wrote:
> Hi Jamal, Stephen,
> 
> I searched for a function providing auto-completion for `tc` utility in
> bash, but I found none. So I have created one, and I would like share it
> with the community. It is available here:
> https://github.com/6WIND/tc_bash-completion/blob/master/tc
> I would like to make it easily available to tc users, so here is a
> twofold request:
> 
> * I do not know where to submit the code. Should I submit here on netdev
> for inclusion in iproute2 package, or rather to the bash-completion
> repository on GitHub? I feel like it would receive better feedback and
> updates if pushed to iproute2. Could you please provide some advice here?
> * The completion for `tc` seems to work well; I have tested it with many
> commands, but I am no tc expert, and there are probably some cases where
> the completion fails to propose the correct choices. I would be really
> interested in any feedback/bug reports that you, or anyone on this list
> who uses tc, could provide.

that looks very interesting.
I think making it a part of iproute2 is a good thing.
How about installing it into /etc/iproute2/ ?
Stephen, any comments?

Re: [PATCH net-next 0/2] net: avoid some atomic ops when FASYNC is not used

2016-04-27 Thread David Miller

From: Eric Dumazet 
Date: Mon, 25 Apr 2016 10:39:31 -0700

> We can avoid some atomic operations on sockets not using FASYNC

I guess a user can do weird things and set/clear the FASYNC bit in the
middle of the SOCKWQ_ASYNC_ bit being set, and reset the FASYNC bit
later and the SOCKWQ_* state is stale.

However, that's probably not worth handling explicitly.

Series applied, thanks.

Re: [net-next PATCH V3 0/5] samples/bpf: Improve user experience

2016-04-27 Thread David Miller

From: Jesper Dangaard Brouer 
Date: Wed, 27 Apr 2016 09:30:08 +0200

> It is a steep learning curve getting started with using the eBPF
> examples in samples/bpf/.  There are several dependencies, and
> specific versions of these dependencies.  Invoking make in the correct
> manor is also slightly obscure.
> 
> This patchset cleanup, document and hopefully improves the first time
> user experience with the eBPF samples directory by auto-detecting
> certain scenarios.
> 
> V3:
>  - Add Alexei's ACKs
>  - Remove README paragraph about LLVM experimental BPF target
>as it only existed between LLVM version 3.6 to 3.7.
> 
> V2:
>  - Adjusted recommend minimum versions to 3.7.1
>  - Included clang build instructions
>  - New patch adding CLANG variable and validation of command

Please respin addressing Naveen's feedback, thanks.

Re: [PATCH net-next 00/17] net: snmp: update SNMP methods

2016-04-27 Thread David Miller

From: Eric Dumazet 
Date: Wed, 27 Apr 2016 16:44:26 -0700

> In the old days (before linux-3.0), SNMP counters were duplicated,
> one set for user context, and anther one for BH context.
> 
> After commit 8f0ea0fe3a03 ("snmp: reduce percpu needs by 50%")
> we have a single copy, and what really matters is preemption being
> enabled or disabled, since we use this_cpu_inc() or __this_cpu_inc()
> respectively.
> 
> This patch series kills the obsolete STATS_USER() helpers,
> and rename all XXX_BH() helpers to __XXX() ones, to more
> closely match conventions used to update per cpu variables.
> 
> This is probably going to hurt maintainers job for a while,
> since cherry-picks will not be clean, but this had to be
> cleaned at one point. I am so sorry guys.

Looks good to me, series applied, thanks Eric.

Re: [net-next v2 00/14][pull request] 40GbE Intel Wired LAN Driver Updates 2016-04-27

2016-04-27 Thread David Miller

From: Jeff Kirsher 
Date: Wed, 27 Apr 2016 13:15:39 -0700

> This series contains updates to i40e and i40evf.

Pulled, thanks Jeff.

[PATCH net 3/3] samples/bpf: fix trace_output example

2016-04-27 Thread Alexei Starovoitov

llvm cannot always recognize memset as builtin function and optimize
it away, so just delete it. It was a leftover from testing
of bpf_perf_event_output() with large data structures.

Fixes: 39111695b1b8 ("samples: bpf: add bpf_perf_event_output example")
Signed-off-by: Alexei Starovoitov 
---
 samples/bpf/trace_output_kern.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c
index 8d8d1ec429eb..9b96f4fb8cea 100644
--- a/samples/bpf/trace_output_kern.c
+++ b/samples/bpf/trace_output_kern.c
@@ -18,7 +18,6 @@ int bpf_prog1(struct pt_regs *ctx)
u64 cookie;
} data;
 
-   memset(&data, 0, sizeof(data));
data.pid = bpf_get_current_pid_tgid();
data.cookie = 0x12345678;
 
-- 
2.8.0

[PATCH net 1/3] bpf: fix refcnt overflow

2016-04-27 Thread Alexei Starovoitov

On a system with >32Gbyte of phyiscal memory and infinite RLIMIT_MEMLOCK,
the malicious application may overflow 32-bit bpf program refcnt.
It's also possible to overflow map refcnt on 1Tb system.
Impose 32k hard limit which means that the same bpf program or
map cannot be shared by more than 32k processes.

Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs")
Reported-by: Jann Horn 
Signed-off-by: Alexei Starovoitov 
Acked-by: Daniel Borkmann 
---
 include/linux/bpf.h   |  3 ++-
 kernel/bpf/inode.c|  7 ---
 kernel/bpf/syscall.c  | 24 
 kernel/bpf/verifier.c | 11 +++
 4 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 21ee41b92e8a..f1d5c5acc8dd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -171,12 +171,13 @@ void bpf_register_prog_type(struct bpf_prog_type_list 
*tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
+struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
 void bpf_prog_put_rcu(struct bpf_prog *prog);
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
-void bpf_map_inc(struct bpf_map *map, bool uref);
+struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 int bpf_map_precharge_memlock(u32 pages);
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index f2ece3c174a5..8f94ca1860cf 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -31,10 +31,10 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
 {
switch (type) {
case BPF_TYPE_PROG:
-   atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
+   raw = bpf_prog_inc(raw);
break;
case BPF_TYPE_MAP:
-   bpf_map_inc(raw, true);
+   raw = bpf_map_inc(raw, true);
break;
default:
WARN_ON_ONCE(1);
@@ -297,7 +297,8 @@ static void *bpf_obj_do_get(const struct filename *pathname,
goto out;
 
raw = bpf_any_get(inode->i_private, *type);
-   touch_atime(&path);
+   if (!IS_ERR(raw))
+   touch_atime(&path);
 
path_put(&path);
return raw;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index adc5e4bd74f8..cf5e9f7ad13a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -218,11 +218,18 @@ struct bpf_map *__bpf_map_get(struct fd f)
return f.file->private_data;
 }
 
-void bpf_map_inc(struct bpf_map *map, bool uref)
+/* prog's and map's refcnt limit */
+#define BPF_MAX_REFCNT 32768
+
+struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 {
-   atomic_inc(&map->refcnt);
+   if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
+   atomic_dec(&map->refcnt);
+   return ERR_PTR(-EBUSY);
+   }
if (uref)
atomic_inc(&map->usercnt);
+   return map;
 }
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
@@ -234,7 +241,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
if (IS_ERR(map))
return map;
 
-   bpf_map_inc(map, true);
+   map = bpf_map_inc(map, true);
fdput(f);
 
return map;
@@ -658,6 +665,15 @@ static struct bpf_prog *__bpf_prog_get(struct fd f)
return f.file->private_data;
 }
 
+struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+{
+   if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) {
+   atomic_dec(&prog->aux->refcnt);
+   return ERR_PTR(-EBUSY);
+   }
+   return prog;
+}
+
 /* called by sockets/tracing/seccomp before attaching program to an event
  * pairs with bpf_prog_put()
  */
@@ -670,7 +686,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
if (IS_ERR(prog))
return prog;
 
-   atomic_inc(&prog->aux->refcnt);
+   prog = bpf_prog_inc(prog);
fdput(f);
 
return prog;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index db2574e7b8b0..89bcaa0966da 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2049,15 +2049,18 @@ static int replace_map_fd_with_map_ptr(struct 
verifier_env *env)
return -E2BIG;
}
 
-   /* remember this map */
-   env->used_maps[env->used_map_cnt++] = map;
-
/* hold the map. If the program is rejected by verifier,
 * the map will be released by release_maps() or it
 * will be used by the valid program until it's unloaded
 * and all maps are released in free_bpf_prog_info()
 */
-   bpf_map_inc(map, false);
+   map = bpf_map_inc(map, false);
+   if (

[PATCH net 0/3] bpf: fix several bugs

2016-04-27 Thread Alexei Starovoitov

First two patches address bugs found by Jann Horn.
Last patch is a minor samples fix spotted during the testing.

Alexei Starovoitov (3):
  bpf: fix refcnt overflow
  bpf: fix check_map_func_compatibility logic
  samples/bpf: fix trace_output example

 include/linux/bpf.h |  3 +-
 kernel/bpf/inode.c  |  7 ++--
 kernel/bpf/syscall.c| 24 ++---
 kernel/bpf/verifier.c   | 76 +
 samples/bpf/trace_output_kern.c |  1 -
 5 files changed, 73 insertions(+), 38 deletions(-)

-- 
2.8.0

[PATCH net 2/3] bpf: fix check_map_func_compatibility logic

2016-04-27 Thread Alexei Starovoitov

The commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that 
get the selected hardware PMU conuter")
introduced clever way to check bpf_helper<->map_type compatibility.
Later on commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") 
adjusted
the logic and inadvertently broke it.
Get rid of the clever bool compare and go back to two-way check
from map and from helper perspective.

Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper")
Reported-by: Jann Horn 
Signed-off-by: Alexei Starovoitov 
Signed-off-by: Daniel Borkmann 
---
 kernel/bpf/verifier.c | 65 +++
 1 file changed, 40 insertions(+), 25 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 89bcaa0966da..c5c17a62f509 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -239,16 +239,6 @@ static const char * const reg_type_str[] = {
[CONST_IMM] = "imm",
 };
 
-static const struct {
-   int map_type;
-   int func_id;
-} func_limit[] = {
-   {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
-   {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
-   {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
-   {BPF_MAP_TYPE_STACK_TRACE, BPF_FUNC_get_stackid},
-};
-
 static void print_verifier_state(struct verifier_env *env)
 {
enum bpf_reg_type t;
@@ -921,27 +911,52 @@ static int check_func_arg(struct verifier_env *env, u32 
regno,
 
 static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 {
-   bool bool_map, bool_func;
-   int i;
-
if (!map)
return 0;
 
-   for (i = 0; i < ARRAY_SIZE(func_limit); i++) {
-   bool_map = (map->map_type == func_limit[i].map_type);
-   bool_func = (func_id == func_limit[i].func_id);
-   /* only when map & func pair match it can continue.
-* don't allow any other map type to be passed into
-* the special func;
-*/
-   if (bool_func && bool_map != bool_func) {
-   verbose("cannot pass map_type %d into func %d\n",
-   map->map_type, func_id);
-   return -EINVAL;
-   }
+   /* We need a two way check, first is from map perspective ... */
+   switch (map->map_type) {
+   case BPF_MAP_TYPE_PROG_ARRAY:
+   if (func_id != BPF_FUNC_tail_call)
+   goto error;
+   break;
+   case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+   if (func_id != BPF_FUNC_perf_event_read &&
+   func_id != BPF_FUNC_perf_event_output)
+   goto error;
+   break;
+   case BPF_MAP_TYPE_STACK_TRACE:
+   if (func_id != BPF_FUNC_get_stackid)
+   goto error;
+   break;
+   default:
+   break;
+   }
+
+   /* ... and second from the function itself. */
+   switch (func_id) {
+   case BPF_FUNC_tail_call:
+   if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+   goto error;
+   break;
+   case BPF_FUNC_perf_event_read:
+   case BPF_FUNC_perf_event_output:
+   if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
+   goto error;
+   break;
+   case BPF_FUNC_get_stackid:
+   if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
+   goto error;
+   break;
+   default:
+   break;
}
 
return 0;
+error:
+   verbose("cannot pass map_type %d into func %d\n",
+   map->map_type, func_id);
+   return -EINVAL;
 }
 
 static int check_call(struct verifier_env *env, int func_id)
-- 
2.8.0

Re: [PATCH v2 net-next 11/13] Documentation: Bindings: Update DT binding for separating dsaf dev support

2016-04-27 Thread Yisen Zhuang

Hi Rob,

Thanks for you comments.

在 2016/4/27 23:25, Rob Herring 写道:
> On Tue, Apr 26, 2016 at 10:33 PM, Yisen Zhuang  
> wrote:
>> Hi Rob and David,
>>
>> Please see my comments inline.
>>
>> David have merged this series to net-next, but we need to modify some codes 
>> according
>> to Rob's comments. I am not sure if i need to send V3 for this series, or 
>> separate
>> patches of documentation to independent series and generate a new patch for 
>> hns base
>> on current net-next?
> 
> That's David's call. I'm guessing he wants follow-up patches on top of these.

Okay, I will send a new series base on current net-next.

> 
>> 在 2016/4/26 20:48, Rob Herring 写道:
>>> On Sat, Apr 23, 2016 at 05:05:15PM +0800, Yisen Zhuang wrote:
 Because debug dsaf port was separated from service dsaf port, this patch
 updates the related information of DT binding.
>>>
>>> Separated when? New version of the h/w? If so, where's the new
>>> compatible string? This is quite a big binding change.
>>
>> There isn't any change of h/w. I separated debug dsaf port from sevice dsaf
>> port to make the code more simple and readability.
> 
> Okay.
> 
> [...]
> 
 +  serdes-syscon rather than this address.
The third region is the PPE register base and size.
 -  The fourth region is dsa fabric base register and size.
 -  The fifth region is cpld base register and size, it is not required if 
 do not use cpld.
 -- phy-handle: phy handle of physicl port, 0 if not any phy device. see 
 ethernet.txt [1].
 +  The fourth region is dsa fabric base register and size. It is not 
 required for
 +  single-port mode.
 +- reg-names: may be ppe-base and(or) dsaf-base. It is used to find the
 +  corresponding reg's index.
>>>
>>> But you have up to 5 regions.
>>>
>>> The variable nature of what regions you have tells me you need more
>>> specific compatible strings for each chip.
>>
>> we didn't add support of new h/w. We added these regions to make code simple 
>> and readability.
>> If we need to add support of next h/w version next time, we don't need to 
>> add many branches
>> for these attributes. So we didn't add a new compatible here.
> 
> Not sure what you mean by branches. It's fine to put properties for
> things that vary among h/w versions, but new compatible strings will
> be needed for any new versions.

I mean than we put properties for things that vary among h/w versions. If we 
add support for
new h/w versions next time, we will add new compatible strings.

> 
> 
 +- port: subnodes of dsaf. A dsaf node may contain several port 
 nodes(Depending
 +  on mode of dsaf). Port node contain some attributes listed below:
 +- port-id: is physical port index in one dsaf.
>>>
>>> Indexes should generally be avoided. What does the number correspond
>>> to in h/w (if anything)?
>>
>> port-id is index for a port in dsaf, it is correspond to index of PHY showed 
>> below.
> 
> Okay, you should use reg property here instead.

Agree, thanks.

> 
>>
>>  CPU
>>   |
>> ---
>> | |   |
>> ---   -
>> | |  ||   |   |   |   |
>> |PPE ||  PPE  |   |  PPE  |
>> | |  ||   |   |   |   |   |
>> | |  ||   |   |   |   |   |
>> |  crossbar  ||   |   |   |   |   |
>> | |  ||   |   |   |   |   |
>> |   --   ||   |   |   |   |   |
>> |   | | |  |  |  |   ||   |   |   |   |   |
>> |   | | |  |  |  |   ||   |   |   |   |   |
>> |  MAC   MAC   MACMACMACMAC  ||  MAC  |   |  MAC  |
>> |   | | |  |  |  |   ||   |   |   |   |   |
>> ---   -
>> | | |  |  |  |\/  |/  |
>>PHY   PHY   PHYPHYPHYPHY\  /  PHY  /  PHY
>> \/   /
>>  \  /   /
>>  DSAF(three platform device)
>>
>>>
 +- phy-handle: phy handle of physicl port. It is not required if there 
 isn't
> 
> Another typo here.

Agree, thanks.

> 
> Rob
> 
> .
>

[PATCH net v3 5/5] drivers: net: cpsw: use of_phy_connect() in fixed-link case

2016-04-27 Thread David Rivshin (Allworx)

From: David Rivshin 

If a fixed-link DT subnode is used, the phy_device was looked up so
that a PHY ID string could be constructed and passed to phy_connect().
This is not necessary, as the device_node can be passed directly to
of_phy_connect() instead. This reuses the same codepath as if the
phy-handle DT property was used.

Signed-off-by: David Rivshin 
Tested-by: Nicolas Chauvet 
Tested-by: Andrew Goodbody 
Reviewed-by: Mugunthan V N 
Reviewed-by: Grygorii Strashko 
---

Changes since v2 [1]:
- Added Tested-by from Andrew Goodbody [3]
- Added Reviewed-by from Mugunthan V N [4]
- Added Reviewed-by from Grygorii Strashko [5]

Changes since v1 [2]:
- Rebased (trivial conflict, e5a03bfd modified the deleted snprintf)
- Added Tested-by from Nicolas Chauvet

[1] http://patchwork.ozlabs.org/patch/613276/
[2] http://patchwork.ozlabs.org/patch/560327/
[3] https://lkml.org/lkml/2016/4/22/537
[4] https://lkml.org/lkml/2016/4/22/63
[5] https://lkml.org/lkml/2016/4/22/529


 drivers/net/ethernet/ti/cpsw.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 712bc6d..e2fcdf1 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2044,30 +2044,21 @@ static int cpsw_probe_dt(struct cpsw_platform_data 
*data,
"phy-handle", 0);
parp = of_get_property(slave_node, "phy_id", &lenp);
if (slave_data->phy_node) {
dev_dbg(&pdev->dev,
"slave[%d] using phy-handle=\"%s\"\n",
i, slave_data->phy_node->full_name);
} else if (of_phy_is_fixed_link(slave_node)) {
-   struct device_node *phy_node;
-   struct phy_device *phy_dev;
-
/* In the case of a fixed PHY, the DT node associated
 * to the PHY is the Ethernet MAC DT node.
 */
ret = of_phy_register_fixed_link(slave_node);
if (ret)
return ret;
-   phy_node = of_node_get(slave_node);
-   phy_dev = of_phy_find_device(phy_node);
-   if (!phy_dev)
-   return -ENODEV;
-   snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
-PHY_ID_FMT, phy_dev->mdio.bus->id,
-phy_dev->mdio.addr);
+   slave_data->phy_node = of_node_get(slave_node);
} else if (parp) {
u32 phyid;
struct device_node *mdio_node;
struct platform_device *mdio;
 
if (lenp != (sizeof(__be32) * 2)) {
dev_err(&pdev->dev, "Invalid slave[%d] phy_id 
property\n", i);
-- 
2.5.5

[PATCH net v3 4/5] dt: cpsw: phy-handle, phy_id, and fixed-link are mutually exclusive

2016-04-27 Thread David Rivshin (Allworx)

From: David Rivshin 

The phy-handle, phy_id, and fixed-link properties are mutually exclusive,
and only one need be specified. Make this clear in the binding doc.

Also mark the phy_id property as deprecated, as phy-handle should be
used instead.

Signed-off-by: David Rivshin 
---

Changes since v2 [1]:
- split from previous patch 2
- marked the phy_id property as deprecated [3]
- removed Rob Herring's Acked-by due to above change

Changes since v1 [2]:
- Rebased (no conflicts)
- Added Tested-by from Nicolas Chauvet
- Added Acked-by from Rob Herring for the binding change

[1] http://patchwork.ozlabs.org/patch/613260/
[2] http://patchwork.ozlabs.org/patch/560324/
[3] https://lkml.org/lkml/2016/4/22/494


 Documentation/devicetree/bindings/net/cpsw.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt 
b/Documentation/devicetree/bindings/net/cpsw.txt
index 28a4781..0ae0649 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -41,21 +41,21 @@ Optional properties:
 Slave Properties:
 Required properties:
 - phy-mode : See ethernet.txt file in the same directory
 
 Optional properties:
 - dual_emac_res_vlan   : Specifies VID to be used to segregate the ports
 - mac-address  : See ethernet.txt file in the same directory
-- phy_id   : Specifies slave phy id
+- phy_id   : Specifies slave phy id (deprecated, use phy-handle)
 - phy-handle   : See ethernet.txt file in the same directory
 
 Slave sub-nodes:
 - fixed-link   : See fixed-link.txt file in the same directory
- Either the property phy_id, or the sub-node
- fixed-link can be specified
+
+Note: Exactly one of phy_id, phy-handle, or fixed-link must be specified.
 
 Note: "ti,hwmods" field is used to fetch the base address and irq
 resources from TI, omap hwmod data base during device registration.
 Future plan is to migrate hwmod data base contents into device tree
 blob so that, all the required data will be used from device tree dts
 file.
 
-- 
2.5.5

[PATCH net v3 3/5] drivers: net: cpsw: don't ignore phy-mode if phy-handle is used

2016-04-27 Thread David Rivshin (Allworx)

From: David Rivshin 

The phy-mode emac property was only being processed in the phy_id
or fixed-link cases. However if phy-handle was specified instead,
an error message would complain about the lack of phy_id or
fixed-link, and then jump past the of_get_phy_mode(). This would
result in the PHY mode defaulting to MII, regardless of what the
devicetree specified.

Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing")
Signed-off-by: David Rivshin 
Tested-by: Nicolas Chauvet 
Tested-by: Andrew Goodbody 
Reviewed-by: Mugunthan V N 
---
I would suggest this for -stable. It should apply cleanly as far back
as 4.4.

Changes since v2 [1]:
- split from previous patch 2
- Added Tested-by from Andrew Goodbody [3]
- Added Reviewed-by from Mugunthan V N [4]
- rewrote commit log to focus on the functional bug fixed, rather
  than the bogus error message

Changes since v1 [2]:
- Rebased (no conflicts)
- Added Tested-by from Nicolas Chauvet
- Added Acked-by from Rob Herring for the binding change

[1] http://patchwork.ozlabs.org/patch/613260/
[2] http://patchwork.ozlabs.org/patch/560324/
[3] https://lkml.org/lkml/2016/4/22/537
[4] https://lkml.org/lkml/2016/4/22/63


 drivers/net/ethernet/ti/cpsw.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 5903448..712bc6d 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2039,15 +2039,19 @@ static int cpsw_probe_dt(struct cpsw_platform_data 
*data,
/* This is no slave child node, continue */
if (strcmp(slave_node->name, "slave"))
continue;
 
slave_data->phy_node = of_parse_phandle(slave_node,
"phy-handle", 0);
parp = of_get_property(slave_node, "phy_id", &lenp);
-   if (of_phy_is_fixed_link(slave_node)) {
+   if (slave_data->phy_node) {
+   dev_dbg(&pdev->dev,
+   "slave[%d] using phy-handle=\"%s\"\n",
+   i, slave_data->phy_node->full_name);
+   } else if (of_phy_is_fixed_link(slave_node)) {
struct device_node *phy_node;
struct phy_device *phy_dev;
 
/* In the case of a fixed PHY, the DT node associated
 * to the PHY is the Ethernet MAC DT node.
 */
ret = of_phy_register_fixed_link(slave_node);
@@ -2076,15 +2080,17 @@ static int cpsw_probe_dt(struct cpsw_platform_data 
*data,
if (!mdio) {
dev_err(&pdev->dev, "Missing mdio platform 
device\n");
return -EINVAL;
}
snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
 PHY_ID_FMT, mdio->name, phyid);
} else {
-   dev_err(&pdev->dev, "No slave[%d] phy_id or fixed-link 
property\n", i);
+   dev_err(&pdev->dev,
+   "No slave[%d] phy_id, phy-handle, or fixed-link 
property\n",
+   i);
goto no_phy_slave;
}
slave_data->phy_if = of_get_phy_mode(slave_node);
if (slave_data->phy_if < 0) {
dev_err(&pdev->dev, "Missing or malformed slave[%d] 
phy-mode property\n",
i);
return slave_data->phy_if;
-- 
2.5.5

[PATCH net v3 2/5] drivers: net: cpsw: fix segfault in case of bad phy-handle

2016-04-27 Thread David Rivshin (Allworx)

From: David Rivshin 

If an emac node has a phy-handle property that points to something
which is not a phy, then a segmentation fault will occur when the
interface is brought up. This is because while phy_connect() will
return ERR_PTR() on failure, of_phy_connect() will return NULL.
The common error check uses IS_ERR(), and so missed when
of_phy_connect() fails. The NULL pointer is then dereferenced.

Also, the common error message referenced slave->data->phy_id,
which would be empty in the case of phy-handle. Instead, use the
name of the device_node as a useful identifier. And in the phy_id
case add the error code for completeness.

Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing")
Signed-off-by: David Rivshin 
---
I would suggest this for -stable. It should apply cleanly as far back
as 4.5, although there is a trivial conflict in 4.4. I can produce a
separate patch against linux-4.4.y if preferred.

Changes since v2:
- new patch, although fixing part of previous patch 2 [1]

Changes since v1 [2]:
- Rebased (no conflicts)
- Added Tested-by from Nicolas Chauvet
- Added Acked-by from Rob Herring for the binding change

[1] http://patchwork.ozlabs.org/patch/613260/
[2] http://patchwork.ozlabs.org/patch/560324/


 drivers/net/ethernet/ti/cpsw.c | 37 +++--
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index ce0b0ca..5903448 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1143,33 +1143,42 @@ static void cpsw_slave_open(struct cpsw_slave *slave, 
struct cpsw_priv *priv)
 
if (priv->data.dual_emac)
cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port);
else
cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
 
-   if (slave->data->phy_node)
+   if (slave->data->phy_node) {
slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node,
 &cpsw_adjust_link, 0, slave->data->phy_if);
-   else
+   if (!slave->phy) {
+   dev_err(priv->dev, "phy \"%s\" not found on slave %d\n",
+   slave->data->phy_node->full_name,
+   slave->slave_num);
+   return;
+   }
+   } else {
slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
 &cpsw_adjust_link, slave->data->phy_if);
-   if (IS_ERR(slave->phy)) {
-   dev_err(priv->dev, "phy %s not found on slave %d\n",
-   slave->data->phy_id, slave->slave_num);
-   slave->phy = NULL;
-   } else {
-   phy_attached_info(slave->phy);
-
-   phy_start(slave->phy);
-
-   /* Configure GMII_SEL register */
-   cpsw_phy_sel(&priv->pdev->dev, slave->phy->interface,
-slave->slave_num);
+   if (IS_ERR(slave->phy)) {
+   dev_err(priv->dev,
+   "phy \"%s\" not found on slave %d, err %ld\n",
+   slave->data->phy_id, slave->slave_num,
+   PTR_ERR(slave->phy));
+   slave->phy = NULL;
+   return;
+   }
}
+
+   phy_attached_info(slave->phy);
+
+   phy_start(slave->phy);
+
+   /* Configure GMII_SEL register */
+   cpsw_phy_sel(&priv->pdev->dev, slave->phy->interface, slave->slave_num);
 }
 
 static inline void cpsw_add_default_vlan(struct cpsw_priv *priv)
 {
const int vlan = priv->data.default_vlan;
const int port = priv->host_port;
u32 reg;
-- 
2.5.5

[PATCH net v3 1/5] drivers: net: cpsw: fix parsing of phy-handle DT property in dual_emac config

2016-04-27 Thread David Rivshin (Allworx)

From: David Rivshin 

Commit 9e42f715264ff158478fa30eaed847f6e131366b ("drivers: net: cpsw: add
phy-handle parsing") saved the "phy-handle" phandle into a new cpsw_priv
field. However, phy connections are per-slave, so the phy_node field should
be in cpsw_slave_data rather than cpsw_priv.

This would go unnoticed in a single emac configuration. But in dual_emac
mode, the last "phy-handle" property parsed for either slave would be used
by both of them, causing them both to refer to the same phy_device.

Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing")
Signed-off-by: David Rivshin 
Tested-by: Nicolas Chauvet 
Tested-by: Andrew Goodbody 
Reviewed-by: Mugunthan V N 
Reviewed-by: Grygorii Strashko 
---
I would suggest this for -stable. It should apply cleanly as far back
as 4.4.

Changes since v2 [1]:
- Added Tested-by from Andrew Goodbody [3]
- Added Reviewed-by from Mugunthan V N [4]
- Added Reviewed-by from Grygorii Strashko [5]

Changes since v1 [2]:
- Rebased (no conflicts)
- Added Tested-by from Nicolas Chauvet

[1] http://patchwork.ozlabs.org/patch/613237/
[2] http://patchwork.ozlabs.org/patch/560326/
[3] https://lkml.org/lkml/2016/4/22/537
[4] https://lkml.org/lkml/2016/4/22/63
[5] https://lkml.org/lkml/2016/4/22/496


 drivers/net/ethernet/ti/cpsw.c | 13 ++---
 drivers/net/ethernet/ti/cpsw.h |  1 +
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index bbb77cd..ce0b0ca 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -363,15 +363,14 @@ static inline void slave_write(struct cpsw_slave *slave, 
u32 val, u32 offset)
__raw_writel(val, slave->regs + offset);
 }
 
 struct cpsw_priv {
spinlock_t  lock;
struct platform_device  *pdev;
struct net_device   *ndev;
-   struct device_node  *phy_node;
struct napi_struct  napi_rx;
struct napi_struct  napi_tx;
struct device   *dev;
struct cpsw_platform_data   data;
struct cpsw_ss_regs __iomem *regs;
struct cpsw_wr_regs __iomem *wr_regs;
u8 __iomem  *hw_stats;
@@ -1144,16 +1143,16 @@ static void cpsw_slave_open(struct cpsw_slave *slave, 
struct cpsw_priv *priv)
 
if (priv->data.dual_emac)
cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port);
else
cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
 
-   if (priv->phy_node)
-   slave->phy = of_phy_connect(priv->ndev, priv->phy_node,
+   if (slave->data->phy_node)
+   slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node,
 &cpsw_adjust_link, 0, slave->data->phy_if);
else
slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
 &cpsw_adjust_link, slave->data->phy_if);
if (IS_ERR(slave->phy)) {
dev_err(priv->dev, "phy %s not found on slave %d\n",
slave->data->phy_id, slave->slave_num);
@@ -1936,20 +1935,19 @@ static void cpsw_slave_init(struct cpsw_slave *slave, 
struct cpsw_priv *priv,
 
slave->data = data;
slave->regs = regs + slave_reg_ofs;
slave->sliver   = regs + sliver_reg_ofs;
slave->port_vlan = data->dual_emac_res_vlan;
 }
 
-static int cpsw_probe_dt(struct cpsw_priv *priv,
+static int cpsw_probe_dt(struct cpsw_platform_data *data,
 struct platform_device *pdev)
 {
struct device_node *node = pdev->dev.of_node;
struct device_node *slave_node;
-   struct cpsw_platform_data *data = &priv->data;
int i = 0, ret;
u32 prop;
 
if (!node)
return -EINVAL;
 
if (of_property_read_u32(node, "slaves", &prop)) {
@@ -2029,15 +2027,16 @@ static int cpsw_probe_dt(struct cpsw_priv *priv,
int lenp;
const __be32 *parp;
 
/* This is no slave child node, continue */
if (strcmp(slave_node->name, "slave"))
continue;
 
-   priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0);
+   slave_data->phy_node = of_parse_phandle(slave_node,
+   "phy-handle", 0);
parp = of_get_property(slave_node, "phy_id", &lenp);
if (of_phy_is_fixed_link(slave_node)) {
struct device_node *phy_node;
struct phy_device *phy_dev;
 
/* In the case of a fixed PHY, the DT node associated
 * to the PHY is the Ethernet MAC DT node.
@@ -2271,15 +2270,15 @@ static int cpsw_probe(struct platform_devi

[PATCH net v3 0/5] drivers: net: cpsw: phy-handle fixes

2016-04-27 Thread David Rivshin (Allworx)

From: David Rivshin 

This series fixes a number of related issues around using phy-handle
properties in cpsw emac nodes.

Patch 1 fixes a bug if more than one slave is used, and either
slave uses the phy-handle property in the devicetree.

Patch 2 fixes a NULL pointer dereference which can occur if a
phy-handle property is used and of_phy_connect() return NULL,
such as with a bad devicetree.

Patch 3 fixes an issue where the phy-mode property would be ignored
if a phy-handle property was used. This also fixes a bogus error
message that would be emitted.

Patch 4 fixes makes the binding documentation more explicit that
exactly one PHY property should be used, and also marks phy_id as
deprecated.

Patch 5 cleans up the fixed-link case to work like the now-fixed
phy-handle case.

I have tested on the following hardware configurations:
 - (EVMSK) dual emac, phy_id property in both slaves
 - (EVMSK) dual emac, phy-handle property in both slaves
 - (EVMSK) a bad phy-handle property pointing to &mmc1
 - (EVMSK) phy_id property with incorrect PHY address
 - (BeagleBoneBlack) single emac, phy_id property
 - (custom) single emac, fixed-link subnode

Andrew Goodbody reported testing v2 on a board that doesn't use
dual_emac mode, but with 2 PHYs using phy-handle properties [1].

Nicolas Chauvet reported testing v2 on an HP t410 (dm8148).

Markus Brunner reported testing v1 on the following [2]:
 - emac0 with phy_id and emac1 with fixed phy
 - emac0 with phy-handle and emac1 with fixed phy
 - emac0 with fixed phy and emac1 with fixed phy

[1] https://lkml.org/lkml/2016/4/22/537
[2] http://www.spinics.net/lists/netdev/msg357890.html

David Rivshin (5):
  drivers: net: cpsw: fix parsing of phy-handle DT property in dual_emac
config
  drivers: net: cpsw: fix segfault in case of bad phy-handle
  drivers: net: cpsw: don't ignore phy-mode if phy-handle is used
  dt: cpsw: phy-handle, phy_id, and fixed-link are mutually exclusive
  drivers: net: cpsw: use of_phy_connect() in fixed-link case

 Documentation/devicetree/bindings/net/cpsw.txt |  6 +--
 drivers/net/ethernet/ti/cpsw.c | 69 ++
 drivers/net/ethernet/ti/cpsw.h |  1 +
 3 files changed, 41 insertions(+), 35 deletions(-)

-- 
2.5.5

Re: [PATCH 3.2 085/115] veth: don’t modify ip_summed; doing so treats packets with bad checksums as good.

2016-04-27 Thread Ben Greear


On 04/27/2016 05:00 PM, Hannes Frederic Sowa wrote:

Hi Ben,

On Wed, Apr 27, 2016, at 20:07, Ben Hutchings wrote:

On Wed, 2016-04-27 at 08:59 -0700, Ben Greear wrote:

On 04/26/2016 04:02 PM, Ben Hutchings wrote:


3.2.80-rc1 review patch.  If anyone has any objections, please let me know.

I would be careful about this.  It causes regressions when sending
PACKET_SOCKET buffers from user-space to veth devices.

There was a proposed upstream fix for the regression, but it has not gone
into the tree as far as I know.

http://www.spinics.net/lists/netdev/msg370436.html

[...]

OK, I'll drop this for now.


The fall out from not having this patch is in my opinion a bigger
fallout than not having this patch. This patch fixes silent data
corruption vs. the problem Ben Greear is talking about, which might not
be that a common usage.

What do others think?

Bye,
Hannes



This patch from Cong Wang seems to fix the regression for me, I think it should 
be added and
tested in the main tree, and then apply them to stable as a pair.

http://dmz2.candelatech.com/?p=linux-4.4.dev.y/.git;a=commitdiff;h=8153e983c0e5eba1aafe1fc296248ed2a553f1ac;hp=454b07405d694dad52e7f41af5816eed0190da8a



diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index da1ae0e..f8cc758 100644 (file)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1926,6 +1926,7 @@ retry:
goto out_unlock;
}

+   skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->protocol = proto;
skb->dev = dev;
skb->priority = sk->sk_priority;
@@ -2352,6 +2353,7 @@ static int tpacket_fill_skb(struct packet_sock *po, 
struct sk_buff *skb,

ph.raw = frame;

+   skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->protocol = proto;
skb->dev = dev;
skb->priority = po->sk.sk_priority;
@@ -2776,6 +2778,7 @@ static int packet_snd(struct socket *sock, struct msghdr 
*msg, size_t len)
goto out_free;
}

+   skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->protocol = proto;
skb->dev = dev;
skb->priority = sk->sk_priority;

Thanks,
Ben

--
Ben Greear 
Candela Technologies Inc  http://www.candelatech.com

Re: [PATCH 3.2 085/115] veth: don’t modify ip_summed; doing so treats packets with bad checksums as good.

2016-04-27 Thread Hannes Frederic Sowa

Hi Ben,

On Wed, Apr 27, 2016, at 20:07, Ben Hutchings wrote:
> On Wed, 2016-04-27 at 08:59 -0700, Ben Greear wrote:
> > On 04/26/2016 04:02 PM, Ben Hutchings wrote:
> > > 
> > > 3.2.80-rc1 review patch.  If anyone has any objections, please let me 
> > > know.
> > I would be careful about this.  It causes regressions when sending
> > PACKET_SOCKET buffers from user-space to veth devices.
> > 
> > There was a proposed upstream fix for the regression, but it has not gone
> > into the tree as far as I know.
> > 
> > http://www.spinics.net/lists/netdev/msg370436.html
> [...]
> 
> OK, I'll drop this for now.

The fall out from not having this patch is in my opinion a bigger
fallout than not having this patch. This patch fixes silent data
corruption vs. the problem Ben Greear is talking about, which might not
be that a common usage.

What do others think?

Bye,
Hannes

[PATCH net-next 17/17] net: snmp: kill STATS_BH macros

2016-04-27 Thread Eric Dumazet

There is nothing related to BH in SNMP counters anymore,
since linux-3.0.

Rename helpers to use __ prefix instead of _BH prefix,
for contexts where preemption is disabled.

This more closely matches convention used to update
percpu variables.

Signed-off-by: Eric Dumazet 
---
 include/net/icmp.h  |  2 +-
 include/net/ip.h| 10 +-
 include/net/ipv6.h  | 36 ++--
 include/net/sctp/sctp.h |  6 +++---
 include/net/snmp.h  | 24 
 include/net/tcp.h   |  2 +-
 include/net/udp.h   |  8 
 net/dccp/dccp.h |  2 +-
 8 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/include/net/icmp.h b/include/net/icmp.h
index 25edb740c648..3ef2743a8eec 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -30,7 +30,7 @@ struct icmp_err {
 
 extern const struct icmp_err icmp_err_convert[];
 #define ICMP_INC_STATS(net, field) 
SNMP_INC_STATS((net)->mib.icmp_statistics, field)
-#define __ICMP_INC_STATS(net, field)   
SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field)
+#define __ICMP_INC_STATS(net, field)   
__SNMP_INC_STATS((net)->mib.icmp_statistics, field)
 #define ICMPMSGOUT_INC_STATS(net, field)   
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256)
 #define ICMPMSGIN_INC_STATS(net, field)
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field)
 
diff --git a/include/net/ip.h b/include/net/ip.h
index fb3b766ca1c7..247ac82e9cf2 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -187,15 +187,15 @@ void ip_send_unicast_reply(struct sock *sk, struct 
sk_buff *skb,
   unsigned int len);
 
 #define IP_INC_STATS(net, field)   
SNMP_INC_STATS64((net)->mib.ip_statistics, field)
-#define __IP_INC_STATS(net, field) 
SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field)
+#define __IP_INC_STATS(net, field) 
__SNMP_INC_STATS64((net)->mib.ip_statistics, field)
 #define IP_ADD_STATS(net, field, val)  
SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
-#define __IP_ADD_STATS(net, field, val) 
SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
+#define __IP_ADD_STATS(net, field, val) 
__SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
 #define IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
-#define __IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
+#define __IP_UPD_PO_STATS(net, field, val) 
__SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)  
SNMP_INC_STATS((net)->mib.net_statistics, field)
-#define __NET_INC_STATS(net, field)
SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
+#define __NET_INC_STATS(net, field)
__SNMP_INC_STATS((net)->mib.net_statistics, field)
 #define NET_ADD_STATS(net, field, adnd)
SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)
-#define __NET_ADD_STATS(net, field, adnd) 
SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)
+#define __NET_ADD_STATS(net, field, adnd) 
__SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)
 
 u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct);
 unsigned long snmp_fold_field(void __percpu *mib, int offt);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 64ce3670d40a..415213da5be3 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -121,21 +121,21 @@ struct frag_hdr {
 extern int sysctl_mld_max_msf;
 extern int sysctl_mld_qrv;
 
-#define _DEVINC(net, statname, modifier, idev, field)  \
+#define _DEVINC(net, statname, mod, idev, field)   \
 ({ \
struct inet6_dev *_idev = (idev);   \
if (likely(_idev != NULL))  \
-   SNMP_INC_STATS##modifier((_idev)->stats.statname, (field)); \
-   SNMP_INC_STATS##modifier((net)->mib.statname##_statistics, (field));\
+   mod##SNMP_INC_STATS64((_idev)->stats.statname, (field));\
+   mod##SNMP_INC_STATS64((net)->mib.statname##_statistics, (field));\
 })
 
 /* per device counters are atomic_long_t */
-#define _DEVINCATOMIC(net, statname, modifier, idev, field)\
+#define _DEVINCATOMIC(net, statname, mod, idev, field) \
 ({ \
struct inet6_dev *_idev = (idev);   \
if (likely(_idev != NULL))  \
SNMP_INC_STATS_ATOMIC_LONG((_idev)->stats.statname##dev, 
(field)); \
-   SNMP_INC_STATS##modifier((net)->mib.statname##_statistics, (field));\
+   mod##SNMP_INC_STATS((net)->mib.statname##_statistics, (field));\
 })
 
 /* per device and per net counters are atomic_long_t */
@@ -147,40 +147,40 @@ extern int sysctl_mld_qrv;
SNMP_INC_STATS

[PATCH net-next 10/17] net: rename ICMP6_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet

Rename ICMP6_INC_STATS_BH() to __ICMP6_INC_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/ipv6.h  |  2 +-
 net/dccp/ipv6.c |  8 
 net/ipv6/icmp.c | 10 +-
 net/ipv6/tcp_ipv6.c |  4 ++--
 net/ipv6/udp.c  |  4 ++--
 net/sctp/ipv6.c |  2 +-
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e93e947d04ff..a620fc56e2f5 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -179,7 +179,7 @@ extern int sysctl_mld_qrv;
_DEVUPD(net, ipv6, 64_BH, idev, field, val)
 #define ICMP6_INC_STATS(net, idev, field)  \
_DEVINCATOMIC(net, icmpv6, , idev, field)
-#define ICMP6_INC_STATS_BH(net, idev, field)   \
+#define __ICMP6_INC_STATS(net, idev, field)\
_DEVINCATOMIC(net, icmpv6, _BH, idev, field)
 
 #define ICMP6MSGOUT_INC_STATS(net, idev, field)\
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index e175b8fe1a87..323c6b595e31 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -80,8 +80,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct 
inet6_skb_parm *opt,
 
if (skb->len < offset + sizeof(*dh) ||
skb->len < offset + __dccp_basic_hdr_len(dh)) {
-   ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
-  ICMP6_MIB_INERRORS);
+   __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
 
@@ -91,8 +91,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct 
inet6_skb_parm *opt,
inet6_iif(skb));
 
if (!sk) {
-   ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
-  ICMP6_MIB_INERRORS);
+   __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6b573ebe49de..823a1fc576e3 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -622,7 +622,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
np->dontfrag, &sockc_unused);
 
if (err) {
-   ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
+   __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
@@ -674,7 +674,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, 
__be32 info)
return;
 
 out:
-   ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+   __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 }
 
 /*
@@ -710,7 +710,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
skb_set_network_header(skb, nh);
}
 
-   ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS);
+   __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
 
saddr = &ipv6_hdr(skb)->saddr;
daddr = &ipv6_hdr(skb)->daddr;
@@ -812,9 +812,9 @@ static int icmpv6_rcv(struct sk_buff *skb)
return 0;
 
 csum_error:
-   ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
+   __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
 discard_it:
-   ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
+   __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
 drop_no_count:
kfree_skb(skb);
return 0;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 52ca8fac7429..78c45c027acc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -336,8 +336,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct 
inet6_skb_parm *opt,
skb->dev->ifindex);
 
if (!sk) {
-   ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
-  ICMP6_MIB_INERRORS);
+   __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1243d22e2b1d..1ba5a74ac18f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -521,8 +521,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct 
inet6_skb_parm *opt,
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
   inet6_iif(skb), udptable, skb);
if (!sk) {
-   ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
-  ICMP6_MIB_INERRORS);
+   __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
return;
}
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ce46f1c7f133..0657d18a85bf 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -162,7 +162,7 @@ static void sctp_v6_err(s

[PATCH net-next 13/17] net: rename NET_{ADD|INC}_STATS_BH()

2016-04-27 Thread Eric Dumazet

Rename NET_INC_STATS_BH() to __NET_INC_STATS()
and NET_ADD_STATS_BH() to __NET_ADD_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/ip.h  |   4 +-
 include/net/tcp.h |   4 +-
 net/core/dev.c|   4 +-
 net/dccp/ipv4.c   |  10 ++---
 net/dccp/ipv6.c   |   8 ++--
 net/dccp/timer.c  |   4 +-
 net/ipv4/arp.c|   2 +-
 net/ipv4/inet_hashtables.c|   2 +-
 net/ipv4/inet_timewait_sock.c |   4 +-
 net/ipv4/ip_input.c   |   2 +-
 net/ipv4/syncookies.c |   4 +-
 net/ipv4/tcp.c|   4 +-
 net/ipv4/tcp_cdg.c|  20 -
 net/ipv4/tcp_cubic.c  |  20 -
 net/ipv4/tcp_fastopen.c   |  14 +++---
 net/ipv4/tcp_input.c  | 100 ++
 net/ipv4/tcp_ipv4.c   |  22 +-
 net/ipv4/tcp_minisocks.c  |  10 ++---
 net/ipv4/tcp_output.c |  14 +++---
 net/ipv4/tcp_recovery.c   |   4 +-
 net/ipv4/tcp_timer.c  |  22 +-
 net/ipv6/inet6_hashtables.c   |   2 +-
 net/ipv6/syncookies.c |   4 +-
 net/ipv6/tcp_ipv6.c   |  16 +++
 net/sctp/input.c  |   2 +-
 25 files changed, 153 insertions(+), 149 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 55f5de50a564..fb3b766ca1c7 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -193,9 +193,9 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff 
*skb,
 #define IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
 #define __IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)  
SNMP_INC_STATS((net)->mib.net_statistics, field)
-#define NET_INC_STATS_BH(net, field)   
SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
+#define __NET_INC_STATS(net, field)
SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
 #define NET_ADD_STATS(net, field, adnd)
SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)
-#define NET_ADD_STATS_BH(net, field, adnd) 
SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)
+#define __NET_ADD_STATS(net, field, adnd) 
SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)
 
 u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct);
 unsigned long snmp_fold_field(void __percpu *mib, int offt);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 939ebd5320a9..ff8b4265cb2b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1743,7 +1743,7 @@ static inline __u32 cookie_init_sequence(const struct 
tcp_request_sock_ops *ops,
 __u16 *mss)
 {
tcp_synq_overflow(sk);
-   NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
+   __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
return ops->cookie_init_seq(skb, mss);
 }
 #else
@@ -1852,7 +1852,7 @@ static inline void tcp_segs_in(struct tcp_sock *tp, const 
struct sk_buff *skb)
 static inline void tcp_listendrop(const struct sock *sk)
 {
atomic_inc(&((struct sock *)sk)->sk_drops);
-   NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+   __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
 }
 
 #endif /* _TCP_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index 6324bc9267f7..e96a3bc2c634 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4982,8 +4982,8 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
netpoll_poll_unlock(have);
}
if (rc > 0)
-   NET_ADD_STATS_BH(sock_net(sk),
-LINUX_MIB_BUSYPOLLRXPACKETS, rc);
+   __NET_ADD_STATS(sock_net(sk),
+   LINUX_MIB_BUSYPOLLRXPACKETS, rc);
local_bh_enable();
 
if (rc == LL_FLUSH_FAILED)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index a9c75e79ba99..a8164272e0f4 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -205,7 +205,7 @@ void dccp_req_err(struct sock *sk, u64 seq)
 * socket here.
 */
if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
-   NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+   __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
} else {
/*
 * Still in RESPOND, just remove it silently.
@@ -273,7 +273,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 * servers this needs to be solved differently.
 */
if (sock_owned_by_user(sk))
-   NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
+   __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
 
if (sk->sk_state == DCCP_CLOSED)
goto out;
@@ -281,7 +281,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
dp = dccp_sk(sk);
if ((1 << sk->sk_state) & ~(DCCPF_R

[PATCH net-next 11/17] net: rename IP_ADD_STATS_BH()

2016-04-27 Thread Eric Dumazet

Rename IP_ADD_STATS_BH() to __IP_ADD_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/ip.h  | 2 +-
 net/ipv4/ip_forward.c | 2 +-
 net/ipv4/ip_input.c   | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 0be0af3017ba..0df4809bc68a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -189,7 +189,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff 
*skb,
 #define IP_INC_STATS(net, field)   
SNMP_INC_STATS64((net)->mib.ip_statistics, field)
 #define __IP_INC_STATS(net, field) 
SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field)
 #define IP_ADD_STATS(net, field, val)  
SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
-#define IP_ADD_STATS_BH(net, field, val) 
SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
+#define __IP_ADD_STATS(net, field, val) 
SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
 #define IP_UPD_PO_STATS_BH(net, field, val) 
SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)  
SNMP_INC_STATS((net)->mib.net_statistics, field)
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 42fbd59b0ba8..cbfb1808fcc4 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -66,7 +66,7 @@ static int ip_forward_finish(struct net *net, struct sock 
*sk, struct sk_buff *s
struct ip_options *opt  = &(IPCB(skb)->opt);
 
__IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
-   IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len);
+   __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
 
if (unlikely(opt->optlen))
ip_forward_options(skb);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index cca6729cd6ee..11f34e421270 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -439,9 +439,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, 
struct packet_type *pt,
BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + 
INET_ECN_ECT_1);
BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + 
INET_ECN_ECT_0);
BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
-   IP_ADD_STATS_BH(net,
-   IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
-   max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
+   __IP_ADD_STATS(net,
+  IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
+  max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
 
if (!pskb_may_pull(skb, iph->ihl*4))
goto inhdr_error;
-- 
2.8.0.rc3.226.g39d4020

[PATCH net-next 06/17] net: tcp: rename TCP_INC_STATS_BH

2016-04-27 Thread Eric Dumazet

Rename TCP_INC_STATS_BH() to __TCP_INC_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/tcp.h|  2 +-
 net/ipv4/tcp.c   |  2 +-
 net/ipv4/tcp_input.c |  8 
 net/ipv4/tcp_ipv4.c  | 16 
 net/ipv4/tcp_minisocks.c |  4 ++--
 net/ipv4/tcp_output.c|  4 ++--
 net/ipv6/tcp_ipv6.c  | 14 +++---
 7 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index cfe15f712164..939ebd5320a9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -332,7 +332,7 @@ bool tcp_check_oom(struct sock *sk, int shift);
 extern struct proto tcp_prot;
 
 #define TCP_INC_STATS(net, field)  
SNMP_INC_STATS((net)->mib.tcp_statistics, field)
-#define TCP_INC_STATS_BH(net, field)   
SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field)
+#define __TCP_INC_STATS(net, field)
SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field)
 #define TCP_DEC_STATS(net, field)  
SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
 #define TCP_ADD_STATS(net, field, val) 
SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 55ef55ac9e38..96833433c2c3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3091,7 +3091,7 @@ void tcp_done(struct sock *sk)
struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
 
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
 
tcp_set_state(sk, TCP_CLOSE);
tcp_clear_xmit_timers(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 967520dbe0bf..dad8d93262ed 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5233,7 +5233,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct 
sk_buff *skb,
if (th->syn) {
 syn_challenge:
if (syn_inerr)
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
tcp_send_challenge_ack(sk, skb);
goto discard;
@@ -5349,7 +5349,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff 
*skb,
tcp_data_snd_check(sk);
return;
} else { /* Header too small */
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
} else {
@@ -5456,8 +5456,8 @@ step5:
return;
 
 csum_error:
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
 
 discard:
tcp_drop(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ebd8f3b9e61b..378e92d41c6c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -697,8 +697,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct 
sk_buff *skb)
  ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  &arg, arg.iov[0].iov_len);
 
-   TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
-   TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
+   __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
+   __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
 
 #ifdef CONFIG_TCP_MD5SIG
 out:
@@ -779,7 +779,7 @@ static void tcp_v4_send_ack(struct net *net,
  ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  &arg, arg.iov[0].iov_len);
 
-   TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+   __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
 }
 
 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -1432,8 +1432,8 @@ discard:
return 0;
 
 csum_err:
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
-   TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+   __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
 }
 EXPORT_SYMBOL(tcp_v4_do_rcv);
@@ -1547,7 +1547,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
goto discard_it;
 
/* Count it even if it's bad */
-   TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
+   __TCP_INC_STATS(net, TCP_MIB_INSEGS);
 
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
@@ -1679,9 +1679,9 @@ no_tcp_socket:
 
if (tcp_checksum_complete(skb)) {
 csum_error:
-   TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
+   __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
 bad_packet:
-   TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+   _

[PATCH net-next 02/17] dccp: rename DCCP_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet

Rename DCCP_INC_STATS_BH() to __DCCP_INC_STATS()

Signed-off-by: Eric Dumazet 
---
 net/dccp/dccp.h  | 6 +++---
 net/dccp/input.c | 2 +-
 net/dccp/ipv4.c  | 8 
 net/dccp/ipv6.c  | 8 
 net/dccp/minisocks.c | 2 +-
 net/dccp/options.c   | 2 +-
 net/dccp/timer.c | 4 ++--
 7 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index b0e28d24e1a7..a4c6e2fed91c 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -198,9 +198,9 @@ struct dccp_mib {
 };
 
 DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
-#define DCCP_INC_STATS(field)  SNMP_INC_STATS(dccp_statistics, field)
-#define DCCP_INC_STATS_BH(field)SNMP_INC_STATS_BH(dccp_statistics, field)
-#define DCCP_DEC_STATS(field)  SNMP_DEC_STATS(dccp_statistics, field)
+#define DCCP_INC_STATS(field)  SNMP_INC_STATS(dccp_statistics, field)
+#define __DCCP_INC_STATS(field)SNMP_INC_STATS_BH(dccp_statistics, 
field)
+#define DCCP_DEC_STATS(field)  SNMP_DEC_STATS(dccp_statistics, field)
 
 /*
  * Checksumming routines
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 3bd14e885396..2437ecc13b82 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -359,7 +359,7 @@ send_sync:
goto discard;
}
 
-   DCCP_INC_STATS_BH(DCCP_MIB_INERRS);
+   __DCCP_INC_STATS(DCCP_MIB_INERRS);
 discard:
__kfree_skb(skb);
return 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f6d183f8f332..4b78067669d6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -318,7 +318,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
case DCCP_REQUESTING:
case DCCP_RESPOND:
if (!sock_owned_by_user(sk)) {
-   DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+   __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
 
sk->sk_error_report(sk);
@@ -533,8 +533,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, 
struct sk_buff *rxskb)
bh_unlock_sock(ctl_sk);
 
if (net_xmit_eval(err) == 0) {
-   DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
-   DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+   __DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+   __DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
}
 out:
 dst_release(dst);
@@ -637,7 +637,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff 
*skb)
 drop_and_free:
reqsk_free(req);
 drop:
-   DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+   __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
return -1;
 }
 EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 8ceb3cebcad4..e175b8fe1a87 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -156,7 +156,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct 
inet6_skb_parm *opt,
case DCCP_RESPOND:  /* Cannot happen.
   It can, it SYNs are crossed. --ANK */
if (!sock_owned_by_user(sk)) {
-   DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+   __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
/*
 * Wake people up to see the error
@@ -277,8 +277,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, 
struct sk_buff *rxskb)
if (!IS_ERR(dst)) {
skb_dst_set(skb, dst);
ip6_xmit(ctl_sk, skb, &fl6, NULL, 0);
-   DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
-   DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+   __DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+   __DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
return;
}
 
@@ -378,7 +378,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct 
sk_buff *skb)
 drop_and_free:
reqsk_free(req);
 drop:
-   DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+   __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
return -1;
 }
 
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 1994f8af646b..53eddf99e4f6 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -127,7 +127,7 @@ struct sock *dccp_create_openreq_child(const struct sock 
*sk,
}
dccp_init_xmit_timers(newsk);
 
-   DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
+   __DCCP_INC_STATS(DCCP_MIB_PASSIVEOPENS);
}
return newsk;
 }
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 9bce31886bda..b82b7ee9a1d2 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -253,7 +253,7 @@ out_nonsensical_length:
return 0;
 
 out_invalid_option:
-   DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
+   __DCCP_INC_STATS(DCCP_MIB_INVALIDOPT);
rc = DCCP_RESET_CODE_OPTION_ERROR;
 out_featneg_failed:
DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc);
diff --git a/net/dccp/t

[PATCH net-next 07/17] net: icmp: rename ICMPMSGIN_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet

Remove misleading _BH suffix.

Signed-off-by: Eric Dumazet 
---
 include/net/icmp.h | 2 +-
 net/ipv4/icmp.c| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/icmp.h b/include/net/icmp.h
index 5a60ce819078..25edb740c648 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -32,7 +32,7 @@ extern const struct icmp_err icmp_err_convert[];
 #define ICMP_INC_STATS(net, field) 
SNMP_INC_STATS((net)->mib.icmp_statistics, field)
 #define __ICMP_INC_STATS(net, field)   
SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field)
 #define ICMPMSGOUT_INC_STATS(net, field)   
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256)
-#define ICMPMSGIN_INC_STATS_BH(net, field) 
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field)
+#define ICMPMSGIN_INC_STATS(net, field)
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field)
 
 struct dst_entry;
 struct net_proto_family;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 995fef9c5099..38abe70e595f 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1006,7 +1006,7 @@ int icmp_rcv(struct sk_buff *skb)
 
icmph = icmp_hdr(skb);
 
-   ICMPMSGIN_INC_STATS_BH(net, icmph->type);
+   ICMPMSGIN_INC_STATS(net, icmph->type);
/*
 *  18 is the highest 'known' ICMP type. Anything else is a mystery
 *
-- 
2.8.0.rc3.226.g39d4020

[PATCH net-next 16/17] ipv6: kill ICMP6MSGIN_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet

IPv6 ICMP stats are atomics anyway.

Signed-off-by: Eric Dumazet 
---
 include/net/ipv6.h | 4 +---
 net/ipv6/icmp.c| 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 9f3b53f2819b..64ce3670d40a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -184,9 +184,7 @@ extern int sysctl_mld_qrv;
 
 #define ICMP6MSGOUT_INC_STATS(net, idev, field)\
_DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field +256)
-#define ICMP6MSGOUT_INC_STATS_BH(net, idev, field) \
-   _DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field +256)
-#define ICMP6MSGIN_INC_STATS_BH(net, idev, field)  \
+#define ICMP6MSGIN_INC_STATS(net, idev, field) \
_DEVINC_ATOMIC_ATOMIC(net, icmpv6msg, idev, field)
 
 struct ip6_ra_chain {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 823a1fc576e3..23b9a4cc418e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -728,7 +728,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 
type = hdr->icmp6_type;
 
-   ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type);
+   ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
 
switch (type) {
case ICMPV6_ECHO_REQUEST:
-- 
2.8.0.rc3.226.g39d4020

[PATCH net-next 14/17] ipv6: rename IP6_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet

Rename IP6_INC_STATS_BH() to __IP6_INC_STATS()
and IP6_ADD_STATS_BH() to __IP6_ADD_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/ipv6.h |  4 +--
 net/bridge/br_netfilter_ipv6.c | 10 +++
 net/ipv6/exthdrs.c | 66 +-
 net/ipv6/ip6_input.c   | 28 +-
 net/ipv6/ip6_output.c  | 34 +++---
 net/ipv6/ip6mr.c   |  8 ++---
 net/ipv6/reassembly.c  | 32 ++--
 7 files changed, 91 insertions(+), 91 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index a620fc56e2f5..aba8760dd108 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -167,11 +167,11 @@ extern int sysctl_mld_qrv;
 
 #define IP6_INC_STATS(net, idev,field) \
_DEVINC(net, ipv6, 64, idev, field)
-#define IP6_INC_STATS_BH(net, idev,field)  \
+#define __IP6_INC_STATS(net, idev,field)   \
_DEVINC(net, ipv6, 64_BH, idev, field)
 #define IP6_ADD_STATS(net, idev,field,val) \
_DEVADD(net, ipv6, 64, idev, field, val)
-#define IP6_ADD_STATS_BH(net, idev,field,val)  \
+#define __IP6_ADD_STATS(net, idev,field,val)   \
_DEVADD(net, ipv6, 64_BH, idev, field, val)
 #define IP6_UPD_PO_STATS(net, idev,field,val)   \
_DEVUPD(net, ipv6, 64, idev, field, val)
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index d61f56efc8dc..5e59a8457e7b 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -122,13 +122,13 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb)
 
if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
if (pkt_len + ip6h_len > skb->len) {
-   IP6_INC_STATS_BH(net, idev,
-IPSTATS_MIB_INTRUNCATEDPKTS);
+   __IP6_INC_STATS(net, idev,
+   IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
}
if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
-   IP6_INC_STATS_BH(net, idev,
-IPSTATS_MIB_INDISCARDS);
+   __IP6_INC_STATS(net, idev,
+   IPSTATS_MIB_INDISCARDS);
goto drop;
}
}
@@ -142,7 +142,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb)
return 0;
 
 inhdr_error:
-   IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
+   __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 drop:
return -1;
 }
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index ea7c4d64a00a..8de5dd7aaa05 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -258,8 +258,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
 ((skb_transport_header(skb)[1] + 1) << 3 {
-   IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst),
-IPSTATS_MIB_INHDRERRORS);
+   __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
+   IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -280,8 +280,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
return 1;
}
 
-   IP6_INC_STATS_BH(dev_net(dst->dev),
-ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
+   __IP6_INC_STATS(dev_net(dst->dev),
+   ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
return -1;
 }
 
@@ -309,8 +309,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
 ((skb_transport_header(skb)[1] + 1) << 3 {
-   IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
-IPSTATS_MIB_INHDRERRORS);
+   __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+   IPSTATS_MIB_INHDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -319,8 +319,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
skb->pkt_type != PACKET_HOST) {
-   IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
-IPSTATS_MIB_INADDRERRORS);
+   __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+   IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
}
@@ -334,8 +334,8 @@ looped_back:
 * processed by own
 */

[PATCH net-next 15/17] ipv6: rename IP6_UPD_PO_STATS_BH()

2016-04-27 Thread Eric Dumazet

Rename IP6_UPD_PO_STATS_BH() to __IP6_UPD_PO_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/ipv6.h   | 2 +-
 net/ipv6/ip6_input.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index aba8760dd108..9f3b53f2819b 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -175,7 +175,7 @@ extern int sysctl_mld_qrv;
_DEVADD(net, ipv6, 64_BH, idev, field, val)
 #define IP6_UPD_PO_STATS(net, idev,field,val)   \
_DEVUPD(net, ipv6, 64, idev, field, val)
-#define IP6_UPD_PO_STATS_BH(net, idev,field,val)   \
+#define __IP6_UPD_PO_STATS(net, idev,field,val)   \
_DEVUPD(net, ipv6, 64_BH, idev, field, val)
 #define ICMP6_INC_STATS(net, idev, field)  \
_DEVINCATOMIC(net, icmpv6, , idev, field)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 218bb906c620..6ed56012005d 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -78,7 +78,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, 
struct packet_type *pt
 
idev = __in6_dev_get(skb->dev);
 
-   IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_IN, skb->len);
+   __IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len);
 
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
!idev || unlikely(idev->cnf.disable_ipv6)) {
@@ -297,7 +297,7 @@ int ip6_mc_input(struct sk_buff *skb)
const struct ipv6hdr *hdr;
bool deliver;
 
-   IP6_UPD_PO_STATS_BH(dev_net(skb_dst(skb)->dev),
+   __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST,
 skb->len);
 
-- 
2.8.0.rc3.226.g39d4020

[PATCH net-next 09/17] net: rename IP_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet

Rename IP_INC_STATS_BH() to __IP_INC_STATS(), to
better express this is used in non preemptible context.

Signed-off-by: Eric Dumazet 
---
 include/net/ip.h|  2 +-
 net/bridge/br_netfilter_hooks.c |  6 +++---
 net/dccp/ipv4.c |  2 +-
 net/ipv4/inet_connection_sock.c |  4 ++--
 net/ipv4/ip_forward.c   |  4 ++--
 net/ipv4/ip_fragment.c  | 14 +++---
 net/ipv4/ip_input.c | 20 ++--
 net/ipv4/route.c|  6 +++---
 8 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index ae0e85d018e8..0be0af3017ba 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -187,7 +187,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff 
*skb,
   unsigned int len);
 
 #define IP_INC_STATS(net, field)   
SNMP_INC_STATS64((net)->mib.ip_statistics, field)
-#define IP_INC_STATS_BH(net, field)
SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field)
+#define __IP_INC_STATS(net, field) 
SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field)
 #define IP_ADD_STATS(net, field, val)  
SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
 #define IP_ADD_STATS_BH(net, field, val) 
SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 44114a94c576..2d25979273a6 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -217,13 +217,13 @@ static int br_validate_ipv4(struct net *net, struct 
sk_buff *skb)
 
len = ntohs(iph->tot_len);
if (skb->len < len) {
-   IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS);
+   __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
goto inhdr_error;
 
if (pskb_trim_rcsum(skb, len)) {
-   IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
+   __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
goto drop;
}
 
@@ -236,7 +236,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff 
*skb)
return 0;
 
 inhdr_error:
-   IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
+   __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
 drop:
return -1;
 }
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 14e30584e59d..a9c75e79ba99 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -462,7 +462,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, 
struct sock *sk,
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt)) {
-   IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+   __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ab69da2d2a77..7ce112aa3a7b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -427,7 +427,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
 route_err:
ip_rt_put(rt);
 no_route:
-   IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+   __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
 }
 EXPORT_SYMBOL_GPL(inet_csk_route_req);
@@ -466,7 +466,7 @@ route_err:
ip_rt_put(rt);
 no_route:
rcu_read_unlock();
-   IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+   __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
 }
 EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index af18f1e4889e..42fbd59b0ba8 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -65,7 +65,7 @@ static int ip_forward_finish(struct net *net, struct sock 
*sk, struct sk_buff *s
 {
struct ip_options *opt  = &(IPCB(skb)->opt);
 
-   IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
+   __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len);
 
if (unlikely(opt->optlen))
@@ -157,7 +157,7 @@ sr_failed:
 
 too_many_hops:
/* Tell the sender its packet died... */
-   IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
+   __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
 drop:
kfree_skb(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index efbd47d1a531..bbe7f72db9c1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -204,14 +204,14 @@ static void ip_expire(unsigned long arg)
goto out;
 
ipq_kill(qp);
-   IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+   __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
 
if (!inet_frag_evictin

[PATCH net-next 08/17] net: sctp: rename SCTP_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet

Rename SCTP_INC_STATS_BH() to __SCTP_INC_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/sctp/sctp.h |  2 +-
 net/sctp/input.c| 12 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 5a2c4c3307a7..5607c009f738 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -206,7 +206,7 @@ extern int sysctl_sctp_wmem[3];
 
 /* SCTP SNMP MIB stats handlers */
 #define SCTP_INC_STATS(net, field)  
SNMP_INC_STATS((net)->sctp.sctp_statistics, field)
-#define SCTP_INC_STATS_BH(net, field)   
SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field)
+#define __SCTP_INC_STATS(net, field)   
SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field)
 #define SCTP_DEC_STATS(net, field)  
SNMP_DEC_STATS((net)->sctp.sctp_statistics, field)
 
 /* sctp mib definitions */
diff --git a/net/sctp/input.c b/net/sctp/input.c
index f8eca792dbcf..12332fc3eb44 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -84,7 +84,7 @@ static inline int sctp_rcv_checksum(struct net *net, struct 
sk_buff *skb)
 
if (val != cmp) {
/* CRC failure, dump it. */
-   SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS);
+   __SCTP_INC_STATS(net, SCTP_MIB_CHECKSUMERRORS);
return -1;
}
return 0;
@@ -122,7 +122,7 @@ int sctp_rcv(struct sk_buff *skb)
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
 
-   SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS);
+   __SCTP_INC_STATS(net, SCTP_MIB_INSCTPPACKS);
 
if (skb_linearize(skb))
goto discard_it;
@@ -208,7 +208,7 @@ int sctp_rcv(struct sk_buff *skb)
 */
if (!asoc) {
if (sctp_rcv_ootb(skb)) {
-   SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES);
+   __SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
goto discard_release;
}
}
@@ -264,9 +264,9 @@ int sctp_rcv(struct sk_buff *skb)
skb = NULL; /* sctp_chunk_free already freed the skb */
goto discard_release;
}
-   SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG);
+   __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_BACKLOG);
} else {
-   SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ);
+   __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_SOFTIRQ);
sctp_inq_push(&chunk->rcvr->inqueue, chunk);
}
 
@@ -281,7 +281,7 @@ int sctp_rcv(struct sk_buff *skb)
return 0;
 
 discard_it:
-   SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS);
+   __SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS);
kfree_skb(skb);
return 0;
 
-- 
2.8.0.rc3.226.g39d4020

[PATCH net-next 12/17] net: rename IP_UPD_PO_STATS_BH()

2016-04-27 Thread Eric Dumazet

Rename IP_UPD_PO_STATS_BH() to __IP_UPD_PO_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/ip.h| 2 +-
 net/ipv4/ip_input.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 0df4809bc68a..55f5de50a564 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -191,7 +191,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff 
*skb,
 #define IP_ADD_STATS(net, field, val)  
SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
 #define __IP_ADD_STATS(net, field, val) 
SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
-#define IP_UPD_PO_STATS_BH(net, field, val) 
SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
+#define __IP_UPD_PO_STATS(net, field, val) 
SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)  
SNMP_INC_STATS((net)->mib.net_statistics, field)
 #define NET_INC_STATS_BH(net, field)   
SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
 #define NET_ADD_STATS(net, field, adnd)
SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 11f34e421270..8fda63d78435 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -358,9 +358,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, 
struct sk_buff *skb)
 
rt = skb_rtable(skb);
if (rt->rt_type == RTN_MULTICAST) {
-   IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len);
+   __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len);
} else if (rt->rt_type == RTN_BROADCAST) {
-   IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len);
+   __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
} else if (skb->pkt_type == PACKET_BROADCAST ||
   skb->pkt_type == PACKET_MULTICAST) {
struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
@@ -409,7 +409,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, 
struct packet_type *pt,
 
 
net = dev_net(dev);
-   IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len);
+   __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
 
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb) {
-- 
2.8.0.rc3.226.g39d4020

[PATCH net-next 01/17] net: snmp: kill various STATS_USER() helpers

2016-04-27 Thread Eric Dumazet

In the old days (before linux-3.0), SNMP counters were duplicated,
one for user context, and one for BH context.

After commit 8f0ea0fe3a03 ("snmp: reduce percpu needs by 50%")
we have a single copy, and what really matters is preemption being
enabled or disabled, since we use this_cpu_inc() or __this_cpu_inc()
respectively.

We therefore kill SNMP_INC_STATS_USER(), SNMP_ADD_STATS_USER(),
NET_INC_STATS_USER(), NET_ADD_STATS_USER(), SCTP_INC_STATS_USER(),
SNMP_INC_STATS64_USER(), SNMP_ADD_STATS64_USER(), TCP_ADD_STATS_USER(),
UDP_INC_STATS_USER(), UDP6_INC_STATS_USER(), and XFRM_INC_STATS_USER()

Following patches will rename __BH helpers to make clear their
usage is not tied to BH being disabled.

Signed-off-by: Eric Dumazet 
---
 include/net/ip.h|  2 --
 include/net/sctp/sctp.h |  1 -
 include/net/snmp.h  | 22 +-
 include/net/tcp.h   |  9 -
 include/net/udp.h   | 14 +++---
 include/net/xfrm.h  |  2 --
 net/ipv4/tcp.c  | 12 ++--
 net/ipv4/udp.c  | 24 
 net/ipv6/udp.c  | 49 -
 net/sctp/chunk.c|  2 +-
 10 files changed, 59 insertions(+), 78 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 93725e546758..ae0e85d018e8 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -194,10 +194,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff 
*skb,
 #define IP_UPD_PO_STATS_BH(net, field, val) 
SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)  
SNMP_INC_STATS((net)->mib.net_statistics, field)
 #define NET_INC_STATS_BH(net, field)   
SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
-#define NET_INC_STATS_USER(net, field) 
SNMP_INC_STATS_USER((net)->mib.net_statistics, field)
 #define NET_ADD_STATS(net, field, adnd)
SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd)
 #define NET_ADD_STATS_BH(net, field, adnd) 
SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)
-#define NET_ADD_STATS_USER(net, field, adnd) 
SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd)
 
 u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct);
 unsigned long snmp_fold_field(void __percpu *mib, int offt);
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 3f1c0ff7d4b6..5a2c4c3307a7 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -207,7 +207,6 @@ extern int sysctl_sctp_wmem[3];
 /* SCTP SNMP MIB stats handlers */
 #define SCTP_INC_STATS(net, field)  
SNMP_INC_STATS((net)->sctp.sctp_statistics, field)
 #define SCTP_INC_STATS_BH(net, field)   
SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field)
-#define SCTP_INC_STATS_USER(net, field) 
SNMP_INC_STATS_USER((net)->sctp.sctp_statistics, field)
 #define SCTP_DEC_STATS(net, field)  
SNMP_DEC_STATS((net)->sctp.sctp_statistics, field)
 
 /* sctp mib definitions */
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 35512ac6dcfb..56239fc05c51 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -126,9 +126,6 @@ struct linux_xfrm_mib {
 #define SNMP_INC_STATS_BH(mib, field)  \
__this_cpu_inc(mib->mibs[field])
 
-#define SNMP_INC_STATS_USER(mib, field)\
-   this_cpu_inc(mib->mibs[field])
-
 #define SNMP_INC_STATS_ATOMIC_LONG(mib, field) \
atomic_long_inc(&mib->mibs[field])
 
@@ -141,9 +138,6 @@ struct linux_xfrm_mib {
 #define SNMP_ADD_STATS_BH(mib, field, addend)  \
__this_cpu_add(mib->mibs[field], addend)
 
-#define SNMP_ADD_STATS_USER(mib, field, addend)\
-   this_cpu_add(mib->mibs[field], addend)
-
 #define SNMP_ADD_STATS(mib, field, addend) \
this_cpu_add(mib->mibs[field], addend)
 #define SNMP_UPD_PO_STATS(mib, basefield, addend)  \
@@ -170,18 +164,14 @@ struct linux_xfrm_mib {
u64_stats_update_end(&ptr->syncp);  \
} while (0)
 
-#define SNMP_ADD_STATS64_USER(mib, field, addend)  \
+#define SNMP_ADD_STATS64(mib, field, addend)   \
do {\
-   local_bh_disable(); \
+   preempt_disable();  \
SNMP_ADD_STATS64_BH(mib, field, addend);\
-   local_bh_enable();  \
+   preempt_enable();   \
} while (0)
 
-#define SNMP_ADD_STATS64(mib, field, addend)   \
-   SNMP_ADD_STATS64_USER(mib, field, addend)
-
 #define SNMP_INC_STATS64_BH(mib, field) SNMP_ADD_STATS64_BH(mib, field, 1)
-#define SNMP_INC_STATS64_USER(mib, field) SNMP_ADD_STATS64_USER(mib, field, 1)
 #define SNMP_INC_STATS64(mib, field) SN

[PATCH net-next 04/17] net: udp: rename UDP_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet

Rename UDP_INC_STATS_BH() to __UDP_INC_STATS(),
and UDP6_INC_STATS_BH() to __UDP6_INC_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/udp.h | 12 ++--
 net/ipv4/udp.c| 46 +++---
 net/ipv6/udp.c| 38 +++---
 net/rxrpc/ar-input.c  |  4 ++--
 net/sunrpc/xprtsock.c |  4 ++--
 5 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/include/net/udp.h b/include/net/udp.h
index 2f37f689d85a..bf6a7c29cf6a 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -292,11 +292,11 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 #define UDP_INC_STATS(net, field, is_udplite)do { \
if (is_udplite) SNMP_INC_STATS((net)->mib.udplite_statistics, field);   
\
elseSNMP_INC_STATS((net)->mib.udp_statistics, field);  }  
while(0)
-#define UDP_INC_STATS_BH(net, field, is_udplite) do { \
+#define __UDP_INC_STATS(net, field, is_udplite)  do { \
if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_statistics, 
field); \
elseSNMP_INC_STATS_BH((net)->mib.udp_statistics, field);
}  while(0)
 
-#define UDP6_INC_STATS_BH(net, field, is_udplite)  do { \
+#define __UDP6_INC_STATS(net, field, is_udplite)   do { \
if (is_udplite) SNMP_INC_STATS_BH((net)->mib.udplite_stats_in6, field);\
elseSNMP_INC_STATS_BH((net)->mib.udp_stats_in6, field);  \
 } while(0)
@@ -306,15 +306,15 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 } while(0)
 
 #if IS_ENABLED(CONFIG_IPV6)
-#define UDPX_INC_STATS_BH(sk, field)   \
+#define __UDPX_INC_STATS(sk, field)\
 do {   \
if ((sk)->sk_family == AF_INET) \
-   UDP_INC_STATS_BH(sock_net(sk), field, 0);   \
+   __UDP_INC_STATS(sock_net(sk), field, 0);\
else\
-   UDP6_INC_STATS_BH(sock_net(sk), field, 0);  \
+   __UDP6_INC_STATS(sock_net(sk), field, 0);   \
 } while (0)
 #else
-#define UDPX_INC_STATS_BH(sk, field) UDP_INC_STATS_BH(sock_net(sk), field, 0)
+#define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0)
 #endif
 
 /* /proc */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6b004b838966..093284c5c03b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1242,10 +1242,10 @@ static unsigned int first_packet_length(struct sock *sk)
spin_lock_bh(&rcvq->lock);
while ((skb = skb_peek(rcvq)) != NULL &&
udp_lib_checksum_complete(skb)) {
-   UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS,
-IS_UDPLITE(sk));
-   UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
-IS_UDPLITE(sk));
+   __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
+   IS_UDPLITE(sk));
+   __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+   IS_UDPLITE(sk));
atomic_inc(&sk->sk_drops);
__skb_unlink(skb, rcvq);
__skb_queue_tail(&list_kill, skb);
@@ -1514,9 +1514,9 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct 
sk_buff *skb)
 
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
-   UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
-is_udplite);
-   UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+   __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
+   is_udplite);
+   __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
trace_udp_fail_queue_rcv_skb(rc, sk);
return -1;
@@ -1580,9 +1580,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff 
*skb)
 
ret = encap_rcv(sk, skb);
if (ret <= 0) {
-   UDP_INC_STATS_BH(sock_net(sk),
-UDP_MIB_INDATAGRAMS,
-is_udplite);
+   __UDP_INC_STATS(sock_net(sk),
+   UDP_MIB_INDATAGRAMS,
+   is_udplite);
return -ret;
}
}
@@ -1633,8 +1633,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff 
*skb)
 
udp_csum_pull_header(skb);
if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
-

[PATCH net-next 05/17] net: xfrm: kill XFRM_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet

Not used anymore.

Signed-off-by: Eric Dumazet 
---
 include/net/xfrm.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index dab9e1b82963..adfebd6f243c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -45,10 +45,8 @@
 
 #ifdef CONFIG_XFRM_STATISTICS
 #define XFRM_INC_STATS(net, field) 
SNMP_INC_STATS((net)->mib.xfrm_statistics, field)
-#define XFRM_INC_STATS_BH(net, field)  
SNMP_INC_STATS_BH((net)->mib.xfrm_statistics, field)
 #else
 #define XFRM_INC_STATS(net, field) ((void)(net))
-#define XFRM_INC_STATS_BH(net, field)  ((void)(net))
 #endif
 
 
-- 
2.8.0.rc3.226.g39d4020

[PATCH net-next 03/17] net: rename ICMP_INC_STATS_BH()

2016-04-27 Thread Eric Dumazet

Rename ICMP_INC_STATS_BH() to __ICMP_INC_STATS()

Signed-off-by: Eric Dumazet 
---
 include/net/icmp.h  |  2 +-
 net/dccp/ipv4.c |  4 ++--
 net/ipv4/icmp.c | 16 
 net/ipv4/tcp_ipv4.c |  2 +-
 net/ipv4/udp.c  |  2 +-
 net/sctp/input.c|  2 +-
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/net/icmp.h b/include/net/icmp.h
index 970028e13382..5a60ce819078 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -30,7 +30,7 @@ struct icmp_err {
 
 extern const struct icmp_err icmp_err_convert[];
 #define ICMP_INC_STATS(net, field) 
SNMP_INC_STATS((net)->mib.icmp_statistics, field)
-#define ICMP_INC_STATS_BH(net, field)  
SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field)
+#define __ICMP_INC_STATS(net, field)   
SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field)
 #define ICMPMSGOUT_INC_STATS(net, field)   
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256)
 #define ICMPMSGIN_INC_STATS_BH(net, field) 
SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field)
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4b78067669d6..14e30584e59d 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -247,7 +247,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 
if (skb->len < offset + sizeof(*dh) ||
skb->len < offset + __dccp_basic_hdr_len(dh)) {
-   ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
}
 
@@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
   iph->saddr, ntohs(dh->dccph_sport),
   inet_iif(skb));
if (!sk) {
-   ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
}
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 6333489771ed..995fef9c5099 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -363,7 +363,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
   icmp_param->data_len+icmp_param->head_len,
   icmp_param->head_len,
   ipc, rt, MSG_DONTWAIT) < 0) {
-   ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS);
+   __ICMP_INC_STATS(sock_net(sk), ICMP_MIB_OUTERRORS);
ip_flush_pending_frames(sk);
} else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
struct icmphdr *icmph = icmp_hdr(skb);
@@ -744,7 +744,7 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 
info)
 * avoid additional coding at protocol handlers.
 */
if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) {
-   ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
return;
}
 
@@ -865,7 +865,7 @@ static bool icmp_unreach(struct sk_buff *skb)
 out:
return true;
 out_err:
-   ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return false;
 }
 
@@ -877,7 +877,7 @@ out_err:
 static bool icmp_redirect(struct sk_buff *skb)
 {
if (skb->len < sizeof(struct iphdr)) {
-   ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
return false;
}
 
@@ -956,7 +956,7 @@ static bool icmp_timestamp(struct sk_buff *skb)
return true;
 
 out_err:
-   ICMP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
return false;
 }
 
@@ -996,7 +996,7 @@ int icmp_rcv(struct sk_buff *skb)
skb_set_network_header(skb, nh);
}
 
-   ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);
+   __ICMP_INC_STATS(net, ICMP_MIB_INMSGS);
 
if (skb_checksum_simple_validate(skb))
goto csum_error;
@@ -1052,9 +1052,9 @@ drop:
kfree_skb(skb);
return 0;
 csum_error:
-   ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS);
+   __ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
 error:
-   ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
goto drop;
 }
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d2a5763e5abc..ebd8f3b9e61b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -372,7 +372,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
   th->dest, iph->saddr, ntohs(th->source),
   inet_iif(icmp_skb));
if (!sk) {
-   ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
+   __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return;
}
if (sk->sk_state == TCP_TIME_WAIT) {

[PATCH net-next 00/17] net: snmp: update SNMP methods

2016-04-27 Thread Eric Dumazet

In the old days (before linux-3.0), SNMP counters were duplicated,
one set for user context, and anther one for BH context.

After commit 8f0ea0fe3a03 ("snmp: reduce percpu needs by 50%")
we have a single copy, and what really matters is preemption being
enabled or disabled, since we use this_cpu_inc() or __this_cpu_inc()
respectively.

This patch series kills the obsolete STATS_USER() helpers,
and rename all XXX_BH() helpers to __XXX() ones, to more
closely match conventions used to update per cpu variables.

This is probably going to hurt maintainers job for a while,
since cherry-picks will not be clean, but this had to be
cleaned at one point. I am so sorry guys.

Eric Dumazet (17):
  net: snmp: kill various STATS_USER() helpers
  dccp: rename DCCP_INC_STATS_BH()
  net: rename ICMP_INC_STATS_BH()
  net: udp: rename UDP_INC_STATS_BH()
  net: xfrm: kill XFRM_INC_STATS_BH()
  net: tcp: rename TCP_INC_STATS_BH
  net: icmp: rename ICMPMSGIN_INC_STATS_BH()
  net: sctp: rename SCTP_INC_STATS_BH()
  net: rename IP_INC_STATS_BH()
  net: rename ICMP6_INC_STATS_BH()
  net: rename IP_ADD_STATS_BH()
  net: rename IP_UPD_PO_STATS_BH()
  net: rename NET_{ADD|INC}_STATS_BH()
  ipv6: rename IP6_INC_STATS_BH()
  ipv6: rename IP6_UPD_PO_STATS_BH()
  ipv6: kill ICMP6MSGIN_INC_STATS_BH()
  net: snmp: kill STATS_BH macros

 include/net/icmp.h  |   4 +-
 include/net/ip.h|  12 ++---
 include/net/ipv6.h  |  48 +-
 include/net/sctp/sctp.h |   7 ++-
 include/net/snmp.h  |  44 ++--
 include/net/tcp.h   |  15 +++---
 include/net/udp.h   |  34 ++---
 include/net/xfrm.h  |   4 --
 net/bridge/br_netfilter_hooks.c |   6 +--
 net/bridge/br_netfilter_ipv6.c  |  10 ++--
 net/core/dev.c  |   4 +-
 net/dccp/dccp.h |   6 +--
 net/dccp/input.c|   2 +-
 net/dccp/ipv4.c |  24 -
 net/dccp/ipv6.c |  24 -
 net/dccp/minisocks.c|   2 +-
 net/dccp/options.c  |   2 +-
 net/dccp/timer.c|   8 +--
 net/ipv4/arp.c  |   2 +-
 net/ipv4/icmp.c |  18 +++
 net/ipv4/inet_connection_sock.c |   4 +-
 net/ipv4/inet_hashtables.c  |   2 +-
 net/ipv4/inet_timewait_sock.c   |   4 +-
 net/ipv4/ip_forward.c   |   6 +--
 net/ipv4/ip_fragment.c  |  14 +++---
 net/ipv4/ip_input.c |  34 ++---
 net/ipv4/route.c|   6 +--
 net/ipv4/syncookies.c   |   4 +-
 net/ipv4/tcp.c  |  18 +++
 net/ipv4/tcp_cdg.c  |  20 
 net/ipv4/tcp_cubic.c|  20 
 net/ipv4/tcp_fastopen.c |  14 +++---
 net/ipv4/tcp_input.c| 108 +---
 net/ipv4/tcp_ipv4.c |  40 +++
 net/ipv4/tcp_minisocks.c|  14 +++---
 net/ipv4/tcp_output.c   |  18 +++
 net/ipv4/tcp_recovery.c |   4 +-
 net/ipv4/tcp_timer.c|  22 
 net/ipv4/udp.c  |  72 +--
 net/ipv6/exthdrs.c  |  66 
 net/ipv6/icmp.c |  12 ++---
 net/ipv6/inet6_hashtables.c |   2 +-
 net/ipv6/ip6_input.c|  32 ++--
 net/ipv6/ip6_output.c   |  34 ++---
 net/ipv6/ip6mr.c|   8 +--
 net/ipv6/reassembly.c   |  32 ++--
 net/ipv6/syncookies.c   |   4 +-
 net/ipv6/tcp_ipv6.c |  34 ++---
 net/ipv6/udp.c  |  91 +
 net/rxrpc/ar-input.c|   4 +-
 net/sctp/chunk.c|   2 +-
 net/sctp/input.c|  16 +++---
 net/sctp/ipv6.c |   2 +-
 net/sunrpc/xprtsock.c   |   4 +-
 54 files changed, 512 insertions(+), 531 deletions(-)

-- 
2.8.0.rc3.226.g39d4020

Re: [PATCH net-next v2 0/7] net: unify dst caching for tunnel devices

2016-04-27 Thread Eric Dumazet

On Tue, 2016-02-16 at 20:22 -0500, David Miller wrote:
> From: Paolo Abeni 
> Date: Fri, 12 Feb 2016 15:43:52 +0100
> 
> > This patch series try to unify the dst cache implementations currently
> > present in the kernel, namely in ip_tunnel.c and ip6_tunnel.c, introducing a
> > new generic implementation, replacing the existing ones, and then using
> > the new implementation in other tunnel devices which currently lack it.
> > 
> > The new dst implementation is compiled, as built-in, only if any device 
> > using
> > it is enabled.
> > 
> > Caching the dst for the tunnel remote address gives small, but measurable,
> > performance improvement when tunneling over ipv4 (in the 2%-4% range) and
> > significant ones when tunneling over ipv6 (roughly 60% when no
> > fragmentation/segmentation take place and the tunnel local address
> > is not specified).
> > 
> > v2:
> > - move the vxlan dst_cache usage inside the device lookup functions
> > - fix usage after free for lwt tunnel moving the dst cache storage inside
> >   the dst_metadata,
> > - sparse codying style cleanup
> 
> Series applied, thanks for doing this work as it is a major improvement.

Paolo, please check following warning :

This might be caused by e09acddf873bf775b208b452a4c3a3fd26fa9427
("ip_tunnel: replace dst_cache with generic implementation")


[   73.982267] BUG: using smp_processor_id() in preemptible [] code: 
ip/10604
[   73.990978] caller is debug_smp_processor_id+0x17/0x20
[   73.990981] CPU: 26 PID: 10604 Comm: ip Not tainted 4.6.0-dbx-DEV #1075
[   73.990982] Hardware name: ...
[   73.990983]   881fc11d3b98 8140a51f 
001a
[   73.990987]  81a585c5 881fc11d3bc8 8142700f 
60bfa000e0c0
[   73.990989]  881fcb6b0f00 9807f60a 881fcb6b0f00 
881fc11d3bd8
[   73.990992] Call Trace:
[   73.990996]  [] dump_stack+0x67/0x98
[   73.990998]  [] check_preemption_disabled+0xef/0x100
[   73.991000]  [] debug_smp_processor_id+0x17/0x20
[   73.991003]  [] dst_cache_set_ip4+0x2c/0x70
[   73.991006]  [] ip_tunnel_bind_dev+0x101/0x170
[   73.991008]  [] ip_tunnel_ioctl+0x330/0x430
[   73.991010]  [] ? ip_tunnel_ioctl+0x5/0x430
[   73.991012]  [] ipgre_tunnel_ioctl+0xdb/0x160
[   73.991015]  [] ? rtnl_lock+0x17/0x20
[   73.991017]  [] dev_ifsioc+0x325/0x370
[   73.991018]  [] dev_ioctl+0xd2/0x630
[   73.991022]  [] sock_ioctl+0xd3/0x270
[   73.991025]  [] do_vfs_ioctl+0x93/0x6f0
[   73.991026]  [] ? sock_alloc_file+0x91/0x120
[   73.991029]  [] ? __fget_light+0x6c/0x90
[   73.991031]  [] SyS_ioctl+0x8b/0xa0
[   73.991042]  [] entry_SYSCALL_64_fastpath+0x18/0xa8

Thanks

Re: [RFC 12/20] net: dsa: rename dst->ds to dst->switches

2016-04-27 Thread Andrew Lunn

On Wed, Apr 27, 2016 at 06:30:09PM -0400, Vivien Didelot wrote:
> dsa_switch stores the net_device pointers in a "ports" member. Be
> consistent and store the dsa_switch pointer in a "switches" member of
> the dsa_switch_tree structure.
> 
> This free us the "ds" member for a future dsa_switch list.

NACK.

Or you need to change ds absolutely everywhere, in all drivers and
APIs. We cannot have ds meaning two different things.

   Andrew

> 
> Signed-off-by: Vivien Didelot 
> ---
>  include/net/dsa.h | 2 +-
>  net/dsa/dsa.c | 8 
>  net/dsa/tag_brcm.c| 2 +-
>  net/dsa/tag_dsa.c | 2 +-
>  net/dsa/tag_edsa.c| 2 +-
>  net/dsa/tag_trailer.c | 2 +-
>  6 files changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index 5f2e7df..389227d 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -124,7 +124,7 @@ struct dsa_switch_tree {
>   /*
>* Data for the individual switch chips.
>*/
> - struct dsa_switch   *ds[DSA_MAX_SWITCHES];
> + struct dsa_switch   *switches[DSA_MAX_SWITCHES];
>  };
>  
>  struct dsa_port {
> diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
> index 3daffb6..aa4a61a 100644
> --- a/net/dsa/dsa.c
> +++ b/net/dsa/dsa.c
> @@ -857,7 +857,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, 
> struct net_device *dev,
>   continue;
>   }
>  
> - dst->ds[i] = ds;
> + dst->switches[i] = ds;
>  
>   ++configured;
>   }
> @@ -953,7 +953,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
>   wmb();
>  
>   for (i = 0; i < dst->pd->nr_chips; i++) {
> - struct dsa_switch *ds = dst->ds[i];
> + struct dsa_switch *ds = dst->switches[i];
>  
>   if (ds)
>   dsa_switch_destroy(ds);
> @@ -1006,7 +1006,7 @@ static int dsa_suspend(struct device *d)
>   int i, ret = 0;
>  
>   for (i = 0; i < dst->pd->nr_chips; i++) {
> - struct dsa_switch *ds = dst->ds[i];
> + struct dsa_switch *ds = dst->switches[i];
>  
>   if (ds != NULL)
>   ret = dsa_switch_suspend(ds);
> @@ -1022,7 +1022,7 @@ static int dsa_resume(struct device *d)
>   int i, ret = 0;
>  
>   for (i = 0; i < dst->pd->nr_chips; i++) {
> - struct dsa_switch *ds = dst->ds[i];
> + struct dsa_switch *ds = dst->switches[i];
>  
>   if (ds != NULL)
>   ret = dsa_switch_resume(ds);
> diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
> index 3d5aabc..35fc75b 100644
> --- a/net/dsa/tag_brcm.c
> +++ b/net/dsa/tag_brcm.c
> @@ -102,7 +102,7 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct 
> net_device *dev,
>   if (unlikely(dst == NULL))
>   goto out_drop;
>  
> - ds = dst->ds[0];
> + ds = dst->switches[0];
>  
>   skb = skb_unshare(skb, GFP_ATOMIC);
>   if (skb == NULL)
> diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
> index c870cfa..bf3eebf8 100644
> --- a/net/dsa/tag_dsa.c
> +++ b/net/dsa/tag_dsa.c
> @@ -109,7 +109,7 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device 
> *dev,
>*/
>   if (source_device >= dst->pd->nr_chips)
>   goto out_drop;
> - ds = dst->ds[source_device];
> + ds = dst->switches[source_device];
>   if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
>   goto out_drop;
>  
> diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
> index 898f949d..4ddbb85 100644
> --- a/net/dsa/tag_edsa.c
> +++ b/net/dsa/tag_edsa.c
> @@ -122,7 +122,7 @@ static int edsa_rcv(struct sk_buff *skb, struct 
> net_device *dev,
>*/
>   if (source_device >= dst->pd->nr_chips)
>   goto out_drop;
> - ds = dst->ds[source_device];
> + ds = dst->switches[source_device];
>   if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
>   goto out_drop;
>  
> diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
> index eaa3440..ade0bbf 100644
> --- a/net/dsa/tag_trailer.c
> +++ b/net/dsa/tag_trailer.c
> @@ -67,7 +67,7 @@ static int trailer_rcv(struct sk_buff *skb, struct 
> net_device *dev,
>  
>   if (unlikely(dst == NULL))
>   goto out_drop;
> - ds = dst->ds[0];
> + ds = dst->switches[0];
>  
>   skb = skb_unshare(skb, GFP_ATOMIC);
>   if (skb == NULL)
> -- 
> 2.8.0
>

Re: [RFC 07/20] net: dsa: list ports in switch\\

2016-04-27 Thread Andrew Lunn

On Wed, Apr 27, 2016 at 06:30:04PM -0400, Vivien Didelot wrote:
> List DSA port structures in their switch structure, so that drivers can
> iterate on them to retrieve information such as their ports membership.

And this would be so much easier using a plan array.

Andrew

> 
> Signed-off-by: Vivien Didelot 
> ---
>  include/net/dsa.h | 9 +
>  net/dsa/dsa.c | 4 
>  2 files changed, 13 insertions(+)
> 
> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index 69e467c..5f2e7df 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -32,6 +32,11 @@ enum dsa_tag_protocol {
>  #define DSA_MAX_SWITCHES 4
>  #define DSA_MAX_PORTS12
>  
> +#define dsa_switch_for_each_port(_ds, _dp, _num_ports)   
> \
> + for (_dp = list_first_entry(&_ds->dp, typeof(*_dp), list);  \
> +  &_dp->list != (&_ds->dp) && _dp->port < _num_ports;\
> +  _dp = list_next_entry(_dp, list))
> +
>  struct dsa_chip_data {
>   /*
>* How to access the switch configuration registers.
> @@ -123,6 +128,8 @@ struct dsa_switch_tree {
>  };
>  
>  struct dsa_port {
> + struct list_headlist;
> +
>   struct dsa_switch   *ds;
>   int port;
>  
> @@ -173,6 +180,8 @@ struct dsa_switch {
>   u32 phys_mii_mask;
>   struct mii_bus  *slave_mii_bus;
>   struct net_device   *ports[DSA_MAX_PORTS];
> +
> + struct list_headdp;
>  };
>  
>  static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
> diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
> index 222494c..3daffb6 100644
> --- a/net/dsa/dsa.c
> +++ b/net/dsa/dsa.c
> @@ -225,6 +225,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
> struct device *parent)
>   int index = ds->index;
>   int i, ret;
>  
> + INIT_LIST_HEAD(&ds->dp);
> +
>   /*
>* Validate supplied switch configuration.
>*/
> @@ -238,6 +240,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
> struct device *parent)
>   dp[i]->ds = ds;
>   dp[i]->port = i;
>  
> + list_add_tail(&dp[i]->list, &ds->dp);
> +
>   name = pd->port_names[i];
>   if (name == NULL)
>   continue;
> -- 
> 2.8.0
>

Re: [RFC 03/20] net: dsa: pass dsa_port down to drivers bridge ops

2016-04-27 Thread Andrew Lunn

On Wed, Apr 27, 2016 at 06:30:00PM -0400, Vivien Didelot wrote:
> Now that DSA as proper structure for DSA ports, pass it down to the
> port_bridge_join and port_bridge_leave driver functions.

I should look at the later patches, but this looks like a step
backwards.

If your ports array is a member of ds, you have no need for this patch
at all.

What advantage does this change bring?

  Andrew

> 
> Signed-off-by: Vivien Didelot 
> ---
>  drivers/net/dsa/bcm_sf2.c   | 28 ++--
>  drivers/net/dsa/mv88e6xxx.c | 10 +-
>  drivers/net/dsa/mv88e6xxx.h |  4 ++--
>  include/net/dsa.h   |  4 ++--
>  net/dsa/slave.c |  4 ++--
>  5 files changed, 25 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
> index f394ea9..2d7b297 100644
> --- a/drivers/net/dsa/bcm_sf2.c
> +++ b/drivers/net/dsa/bcm_sf2.c
> @@ -491,15 +491,15 @@ static int bcm_sf2_sw_fast_age_port(struct dsa_switch  
> *ds, int port)
>   return 0;
>  }
>  
> -static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port,
> +static int bcm_sf2_sw_br_join(struct dsa_switch *ds, struct dsa_port *dp,
> struct net_device *bridge)
>  {
>   struct bcm_sf2_priv *priv = ds_to_priv(ds);
>   unsigned int i;
>   u32 reg, p_ctl;
>  
> - priv->port_sts[port].bridge_dev = bridge;
> - p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
> + priv->port_sts[dp->port].bridge_dev = bridge;
> + p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
>  
>   for (i = 0; i < priv->hw_params.num_ports; i++) {
>   if (priv->port_sts[i].bridge_dev != bridge)
> @@ -509,7 +509,7 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int 
> port,
>* membership and update the remote port bitmask
>*/
>   reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
> - reg |= 1 << port;
> + reg |= 1 << dp->port;
>   core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
>   priv->port_sts[i].vlan_ctl_mask = reg;
>  
> @@ -519,20 +519,20 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, 
> int port,
>   /* Configure the local port VLAN control membership to include
>* remote ports and update the local port bitmask
>*/
> - core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port));
> - priv->port_sts[port].vlan_ctl_mask = p_ctl;
> + core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port));
> + priv->port_sts[dp->port].vlan_ctl_mask = p_ctl;
>  
>   return 0;
>  }
>  
> -static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port,
> +static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, struct dsa_port *dp,
>   struct net_device *bridge)
>  {
>   struct bcm_sf2_priv *priv = ds_to_priv(ds);
>   unsigned int i;
>   u32 reg, p_ctl;
>  
> - p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
> + p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
>  
>   for (i = 0; i < priv->hw_params.num_ports; i++) {
>   /* Don't touch the remaining ports */
> @@ -540,18 +540,18 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, 
> int port,
>   continue;
>  
>   reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
> - reg &= ~(1 << port);
> + reg &= ~(1 << dp->port);
>   core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
> - priv->port_sts[port].vlan_ctl_mask = reg;
> + priv->port_sts[dp->port].vlan_ctl_mask = reg;
>  
>   /* Prevent self removal to preserve isolation */
> - if (port != i)
> + if (dp->port != i)
>   p_ctl &= ~(1 << i);
>   }
>  
> - core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port));
> - priv->port_sts[port].vlan_ctl_mask = p_ctl;
> - priv->port_sts[port].bridge_dev = NULL;
> + core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port));
> + priv->port_sts[dp->port].vlan_ctl_mask = p_ctl;
> + priv->port_sts[dp->port].bridge_dev = NULL;
>  }
>  
>  static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port,
> diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
> index 86f8f2f..3f78c73 100644
> --- a/drivers/net/dsa/mv88e6xxx.c
> +++ b/drivers/net/dsa/mv88e6xxx.c
> @@ -2203,7 +2203,7 @@ unlock:
>   return err;
>  }
>  
> -int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
> +int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp,
>  struct net_device *bridge)
>  {
>   struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
> @@ -2212,7 +2212,7 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, 
> int port,
>   mutex_lock(&ps->smi_mutex);
>  
>   /* Assign the bridge and remap each port's VLANTable */
> -

Re: [RFC 01/20] net: dsa: introduce a dsa_port structure

2016-04-27 Thread Andrew Lunn

> @@ -230,6 +231,13 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
> struct device *parent)
>   for (i = 0; i < DSA_MAX_PORTS; i++) {
>   char *name;
>  
> + dp[i] = devm_kzalloc(parent, sizeof(*dp), GFP_KERNEL);
> + if (dp[i] == NULL)
> + return -ENOMEM;

You are not saving anything here by dynamically allocating the memory,
since you do it for all ports. So just make it a member of ds with
size DSA_MAX_PORTS. I would then call this array structure ports.

Humm, i also think keeping it in dsa_slave_priv is wrong, if you have
defined the structure in the global include/net/dsa.h. dsa_switch is a
better place for it.

 Andrew

[RFC 00/20] net: dsa: dsa_port structure and tree-wide ops

2016-04-27 Thread Vivien Didelot

In a previous RFC [1], I introduced the need to implement cross-chip operations
in the DSA layer.

Here's a summary. In a multiple switches setup such as the following, every
switch of the tree must be aware of its configuration in order to configure a
correct data path between chips.

  sw0 sw1 sw2   
  
[ 0 1 2 3 4 5 ] [ 0 1 2 3 4 5 ] [ 0 1 2 3 4 5 ] 
  
  |   ' ^ ^ ^ ^ '   
  
  v   ' | | | | '   
  
 CPU  ' `-DSA-' `-DSA-' '   
  
  ' '   
  
  + - - - - - - - br0 - - - - - - - +  

For instance, bridging sw0p2 and sw2p3 together in a VLAN 42 requires both
chips to allow frames from the external port to egress its internal port, all
DSA ports between them must learn their address, and sw1 must also be aware of
the VLAN 42 in order to allow tagged packets to cross the chip.

To implement all that nicely, we need a way to progagate such notification to
every switch of a DSA tree.

The patchset introduces a dsa_port structure to bundle port-centric info such
as its switch index, port number, bridge device, and change the DSA driver
functions to take such structure as parameter instead of a internal port
number.

The DSA layer then introduces tree-wide operations, which calls every switch
driver when a port operation occurs. This is the responsibility of a switch
driver to check if the related port is internal or external to its chip, and
behave in consequence.

See the patchset as different logical groups (that may be split later):

  * patches 1 to 5: introduce the dsa_port structure to DSA drivers

  * patches 6 to 11: put the bridge device in the dsa_port structure and allow
the DSA drivers to get rid of their private bridge_dev pointer

  * patches 12 to 16: introduce tree-wide operations. Driver are now aware of
cross-chip port operations

  * patches 17 to 20: implement cross-chip hardware bridging in mv88e6xxx

A branch is available here [2] and a debugfs patch is maintained here [3] in
order to inspect the Marvell switch's internal structures, such as the PVT.

Many things remains to do after this, such as using dsa_port_is_{cpu,dsa}
helpers, getting rid of dst->switches and ds->ports in favor of their related
switch and port lists, and introduce dynamic number of switches and ports.

[1] https://lkml.org/lkml/2016/4/20/733
[2] https://github.com/vivien/linux/tree/dsa/dev
[3] 
https://github.com/vivien/linux/commit/da33b1a698fef3a66515a05e2b9f31d0279a89d4.patch

Cheers,

Vivien Didelot (20):
  net: dsa: introduce a dsa_port structure
  net: dsa: be consistent with NETDEV_CHANGEUPPER
  net: dsa: pass dsa_port down to drivers bridge ops
  net: dsa: pass dsa_port down to drivers FDB ops
  net: dsa: pass dsa_port down to drivers VLAN ops
  net: dsa: move bridge device in dsa_port
  net: dsa: list ports in switch
  net: dsa: bcm_sf2: use bridge device from dsa_port
  net: dsa: mv88e6xxx: check HW vlan with dsa_port
  net: dsa: mv88e6xxx: setup a dsa_port
  net: dsa: mv88e6xxx: use bridge from dsa_port
  net: dsa: rename dst->ds to dst->switches
  net: dsa: list switches in tree
  net: dsa: add tree-wide bridge ops
  net: dsa: add tree-wide FDB ops
  net: dsa: add tree-wide VLAN ops
  net: dsa: mv88e6xxx: factorize port bridge change
  net: dsa: mv88e6xxx: add flags to info
  net: dsa: mv88e6xxx: conditionally init PVT
  net: dsa: mv88e6xxx: setup PVT on cross-chip ops

 drivers/net/dsa/bcm_sf2.c   |  92 +-
 drivers/net/dsa/bcm_sf2.h   |   2 -
 drivers/net/dsa/mv88e6352.c |   1 +
 drivers/net/dsa/mv88e6xxx.c | 397 
 drivers/net/dsa/mv88e6xxx.h |  41 +++--
 include/net/dsa.h   |  57 +--
 net/dsa/Makefile|   2 +-
 net/dsa/dsa.c   |  25 ++-
 net/dsa/dsa_priv.h  |  37 +++--
 net/dsa/slave.c | 283 +--
 net/dsa/tag_brcm.c  |   6 +-
 net/dsa/tag_dsa.c   |  10 +-
 net/dsa/tag_edsa.c  |  10 +-
 net/dsa/tag_trailer.c   |   4 +-
 net/dsa/tree.c  | 187 +
 15 files changed, 751 insertions(+), 403 deletions(-)
 create mode 100644 net/dsa/tree.c

-- 
2.8.0

[RFC 05/20] net: dsa: pass dsa_port down to drivers VLAN ops

2016-04-27 Thread Vivien Didelot

Now that DSA as proper structure for DSA ports, pass it down to the
port_vlan_{filtering,prepare,add,del,dump} driver functions.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 41 +
 drivers/net/dsa/mv88e6xxx.h | 10 +-
 include/net/dsa.h   | 11 ++-
 net/dsa/slave.c | 10 +-
 4 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index c1ff763..7e03f4c 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1367,7 +1367,7 @@ static int _mv88e6xxx_vtu_getnext(struct dsa_switch *ds,
return 0;
 }
 
-int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port,
+int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, struct dsa_port *dp,
 struct switchdev_obj_port_vlan *vlan,
 int (*cb)(struct switchdev_obj *obj))
 {
@@ -1378,7 +1378,7 @@ int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int 
port,
 
mutex_lock(&ps->smi_mutex);
 
-   err = _mv88e6xxx_port_pvid_get(ds, port, &pvid);
+   err = _mv88e6xxx_port_pvid_get(ds, dp->port, &pvid);
if (err)
goto unlock;
 
@@ -1394,14 +1394,15 @@ int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int 
port,
if (!next.valid)
break;
 
-   if (next.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
+   if (next.data[dp->port] ==
+   GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
continue;
 
/* reinit and dump this VLAN obj */
vlan->vid_begin = vlan->vid_end = next.vid;
vlan->flags = 0;
 
-   if (next.data[port] == GLOBAL_VTU_DATA_MEMBER_TAG_UNTAGGED)
+   if (next.data[dp->port] == GLOBAL_VTU_DATA_MEMBER_TAG_UNTAGGED)
vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
 
if (next.vid == pvid)
@@ -1789,7 +1790,7 @@ static const char * const 
mv88e6xxx_port_8021q_mode_names[] = {
[PORT_CONTROL_2_8021Q_SECURE] = "Secure",
 };
 
-int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port,
+int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, struct dsa_port *dp,
  bool vlan_filtering)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
@@ -1799,7 +1800,7 @@ int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, 
int port,
 
mutex_lock(&ps->smi_mutex);
 
-   ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_CONTROL_2);
+   ret = _mv88e6xxx_reg_read(ds, REG_PORT(dp->port), PORT_CONTROL_2);
if (ret < 0)
goto unlock;
 
@@ -1809,12 +1810,12 @@ int mv88e6xxx_port_vlan_filtering(struct dsa_switch 
*ds, int port,
ret &= ~PORT_CONTROL_2_8021Q_MASK;
ret |= new & PORT_CONTROL_2_8021Q_MASK;
 
-   ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL_2,
-  ret);
+   ret = _mv88e6xxx_reg_write(ds, REG_PORT(dp->port),
+  PORT_CONTROL_2, ret);
if (ret < 0)
goto unlock;
 
-   netdev_dbg(ds->ports[port], "802.1Q Mode %s (was %s)\n",
+   netdev_dbg(ds->ports[dp->port], "802.1Q Mode %s (was %s)\n",
   mv88e6xxx_port_8021q_mode_names[new],
   mv88e6xxx_port_8021q_mode_names[old]);
}
@@ -1826,7 +1827,7 @@ unlock:
return ret;
 }
 
-int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port,
+int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
struct switchdev_trans *trans)
 {
@@ -1835,7 +1836,7 @@ int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, 
int port,
/* If the requested port doesn't belong to the same bridge as the VLAN
 * members, do not support it (yet) and fallback to software VLAN.
 */
-   err = mv88e6xxx_port_check_hw_vlan(ds, port, vlan->vid_begin,
+   err = mv88e6xxx_port_check_hw_vlan(ds, dp->port, vlan->vid_begin,
   vlan->vid_end);
if (err)
return err;
@@ -1863,7 +1864,7 @@ static int _mv88e6xxx_port_vlan_add(struct dsa_switch 
*ds, int port, u16 vid,
return _mv88e6xxx_vtu_loadpurge(ds, &vlan);
 }
 
-void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port,
+void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, struct dsa_port *dp,
 const struct switchdev_obj_port_vlan *vlan,
 struct switchdev_trans *trans)
 {
@@ -1875,12 +1876,12 @@ void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int 
port,
mutex_lock(&ps->smi_mutex);

[RFC 01/20] net: dsa: introduce a dsa_port structure

2016-04-27 Thread Vivien Didelot

Introduce a new dsa_port structure, used to store port-centric
information, such as a pointer to its DSA switch and its port number.
It will later contains further data, such as its bridge device.

This is a first step towards implementing cross-chip port operations.

Signed-off-by: Vivien Didelot 
---
 include/net/dsa.h |   5 ++
 net/dsa/dsa.c |  10 +++-
 net/dsa/dsa_priv.h|  13 ++---
 net/dsa/slave.c   | 147 +-
 net/dsa/tag_brcm.c|   4 +-
 net/dsa/tag_dsa.c |   8 +--
 net/dsa/tag_edsa.c|   8 +--
 net/dsa/tag_trailer.c |   2 +-
 8 files changed, 104 insertions(+), 93 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 2d280ab..255c108 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -122,6 +122,11 @@ struct dsa_switch_tree {
struct dsa_switch   *ds[DSA_MAX_SWITCHES];
 };
 
+struct dsa_port {
+   struct dsa_switch   *ds;
+   int port;
+};
+
 struct dsa_switch {
/*
 * Parent switch tree, and switch index.
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index d61ceed..222494c 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -219,6 +219,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
struct device *parent)
 {
struct dsa_switch_driver *drv = ds->drv;
struct dsa_switch_tree *dst = ds->dst;
+   struct dsa_port *dp[DSA_MAX_PORTS];
struct dsa_chip_data *pd = ds->pd;
bool valid_name_found = false;
int index = ds->index;
@@ -230,6 +231,13 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
struct device *parent)
for (i = 0; i < DSA_MAX_PORTS; i++) {
char *name;
 
+   dp[i] = devm_kzalloc(parent, sizeof(*dp), GFP_KERNEL);
+   if (dp[i] == NULL)
+   return -ENOMEM;
+
+   dp[i]->ds = ds;
+   dp[i]->port = i;
+
name = pd->port_names[i];
if (name == NULL)
continue;
@@ -328,7 +336,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
struct device *parent)
if (!(ds->enabled_port_mask & (1 << i)))
continue;
 
-   ret = dsa_slave_create(ds, parent, i, pd->port_names[i]);
+   ret = dsa_slave_create(dp[i], parent, pd->port_names[i]);
if (ret < 0) {
netdev_err(dst->master_netdev, "[%d]: can't create dsa 
slave device for port %d(%s): %d\n",
   index, i, pd->port_names[i], ret);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index dfa3377..c7d5df0 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -26,13 +26,6 @@ struct dsa_slave_priv {
struct net_device *dev);
 
/*
-* Which switch this port is a part of, and the port index
-* for this port.
-*/
-   struct dsa_switch   *parent;
-   u8  port;
-
-   /*
 * The phylib phy_device pointer for the PHY connected
 * to this port.
 */
@@ -46,6 +39,9 @@ struct dsa_slave_priv {
 #ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll  *netpoll;
 #endif
+
+   /* DSA specific data */
+   struct dsa_port *dp;
 };
 
 /* dsa.c */
@@ -54,8 +50,7 @@ extern char dsa_driver_version[];
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
-int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
-int port, char *name);
+int dsa_slave_create(struct dsa_port *dp, struct device *parent, char *name);
 void dsa_slave_destroy(struct net_device *slave_dev);
 int dsa_slave_suspend(struct net_device *slave_dev);
 int dsa_slave_resume(struct net_device *slave_dev);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3b6750f..6115444 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -61,7 +61,7 @@ static int dsa_slave_get_iflink(const struct net_device *dev)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
 
-   return p->parent->dst->master_netdev->ifindex;
+   return p->dp->ds->dst->master_netdev->ifindex;
 }
 
 static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p)
@@ -72,8 +72,8 @@ static inline bool dsa_port_is_bridged(struct dsa_slave_priv 
*p)
 static int dsa_slave_open(struct net_device *dev)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
-   struct net_device *master = p->parent->dst->master_netdev;
-   struct dsa_switch *ds = p->parent;
+   struct dsa_switch *ds = p->dp->ds;
+   struct net_device *master = ds->dst->master_netdev;
u8 stp_state = dsa_port_is_bridged(p) ?
BR_STATE_BLOCKING : BR_STATE_FORWARDING;
int err;
@@ -99,13 +99,13 @@ static int dsa_slave_open(struct net_device *dev)
}
 
if (ds->drv->port_enable) {

[RFC 03/20] net: dsa: pass dsa_port down to drivers bridge ops

2016-04-27 Thread Vivien Didelot

Now that DSA as proper structure for DSA ports, pass it down to the
port_bridge_join and port_bridge_leave driver functions.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/bcm_sf2.c   | 28 ++--
 drivers/net/dsa/mv88e6xxx.c | 10 +-
 drivers/net/dsa/mv88e6xxx.h |  4 ++--
 include/net/dsa.h   |  4 ++--
 net/dsa/slave.c |  4 ++--
 5 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index f394ea9..2d7b297 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -491,15 +491,15 @@ static int bcm_sf2_sw_fast_age_port(struct dsa_switch  
*ds, int port)
return 0;
 }
 
-static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port,
+static int bcm_sf2_sw_br_join(struct dsa_switch *ds, struct dsa_port *dp,
  struct net_device *bridge)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
unsigned int i;
u32 reg, p_ctl;
 
-   priv->port_sts[port].bridge_dev = bridge;
-   p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
+   priv->port_sts[dp->port].bridge_dev = bridge;
+   p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
 
for (i = 0; i < priv->hw_params.num_ports; i++) {
if (priv->port_sts[i].bridge_dev != bridge)
@@ -509,7 +509,7 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int 
port,
 * membership and update the remote port bitmask
 */
reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
-   reg |= 1 << port;
+   reg |= 1 << dp->port;
core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
priv->port_sts[i].vlan_ctl_mask = reg;
 
@@ -519,20 +519,20 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int 
port,
/* Configure the local port VLAN control membership to include
 * remote ports and update the local port bitmask
 */
-   core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port));
-   priv->port_sts[port].vlan_ctl_mask = p_ctl;
+   core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port));
+   priv->port_sts[dp->port].vlan_ctl_mask = p_ctl;
 
return 0;
 }
 
-static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port,
+static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, struct dsa_port *dp,
struct net_device *bridge)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
unsigned int i;
u32 reg, p_ctl;
 
-   p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
+   p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
 
for (i = 0; i < priv->hw_params.num_ports; i++) {
/* Don't touch the remaining ports */
@@ -540,18 +540,18 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, 
int port,
continue;
 
reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
-   reg &= ~(1 << port);
+   reg &= ~(1 << dp->port);
core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
-   priv->port_sts[port].vlan_ctl_mask = reg;
+   priv->port_sts[dp->port].vlan_ctl_mask = reg;
 
/* Prevent self removal to preserve isolation */
-   if (port != i)
+   if (dp->port != i)
p_ctl &= ~(1 << i);
}
 
-   core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port));
-   priv->port_sts[port].vlan_ctl_mask = p_ctl;
-   priv->port_sts[port].bridge_dev = NULL;
+   core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port));
+   priv->port_sts[dp->port].vlan_ctl_mask = p_ctl;
+   priv->port_sts[dp->port].bridge_dev = NULL;
 }
 
 static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 86f8f2f..3f78c73 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2203,7 +2203,7 @@ unlock:
return err;
 }
 
-int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
+int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp,
   struct net_device *bridge)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
@@ -2212,7 +2212,7 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int 
port,
mutex_lock(&ps->smi_mutex);
 
/* Assign the bridge and remap each port's VLANTable */
-   ps->ports[port].bridge_dev = bridge;
+   ps->ports[dp->port].bridge_dev = bridge;
 
for (i = 0; i < ps->info->num_ports; ++i) {
if (ps->ports[i].bridge_dev == bridge) {
@@ -2227,7 +2227,7 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int 
port,
return err;
 }
 
-void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port,
+void mv88e6xxx_

[RFC 02/20] net: dsa: be consistent with NETDEV_CHANGEUPPER

2016-04-27 Thread Vivien Didelot

Once NETDEV_CHANGEUPPER is emitted, the device is already (un)bridged.

If an error is returned on port_bridge_join, the bridge layer will
rollback the operation and unbridge the port.

Respect this by setting bridge_dev to NULL on error.

Also the DSA layer shouldn't assume that the drivers know about the
bridge device a port was previously bridged to. So pass the bridge
device to port_bridge_leave.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/bcm_sf2.c   |  4 ++--
 drivers/net/dsa/mv88e6xxx.c |  4 ++--
 drivers/net/dsa/mv88e6xxx.h |  3 ++-
 include/net/dsa.h   |  3 ++-
 net/dsa/slave.c | 13 +
 5 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 448deb5..f394ea9 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -525,10 +525,10 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int 
port,
return 0;
 }
 
-static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port)
+static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port,
+   struct net_device *bridge)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
-   struct net_device *bridge = priv->port_sts[port].bridge_dev;
unsigned int i;
u32 reg, p_ctl;
 
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 028f92f..86f8f2f 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2227,10 +2227,10 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, 
int port,
return err;
 }
 
-void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port)
+void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port,
+struct net_device *bridge)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-   struct net_device *bridge = ps->ports[port].bridge_dev;
int i;
 
mutex_lock(&ps->smi_mutex);
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index 0dbe2d1..2eb9a82 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -492,7 +492,8 @@ int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
  struct phy_device *phydev, struct ethtool_eee *e);
 int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
   struct net_device *bridge);
-void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port);
+void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port,
+struct net_device *bridge);
 void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state);
 int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port,
  bool vlan_filtering);
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 255c108..ed33500 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -305,7 +305,8 @@ struct dsa_switch_driver {
 */
int (*port_bridge_join)(struct dsa_switch *ds, int port,
struct net_device *bridge);
-   void(*port_bridge_leave)(struct dsa_switch *ds, int port);
+   void(*port_bridge_leave)(struct dsa_switch *ds, int port,
+struct net_device *bridge);
void(*port_stp_state_set)(struct dsa_switch *ds, int port,
  u8 state);
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 6115444..f2ec13d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -443,19 +443,24 @@ static int dsa_slave_bridge_port_join(struct net_device 
*dev,
if (ds->drv->port_bridge_join)
ret = ds->drv->port_bridge_join(ds, p->dp->port, br);
 
-   return ret == -EOPNOTSUPP ? 0 : ret;
+   if (ret && ret != -EOPNOTSUPP) {
+   p->bridge_dev = NULL;
+   return ret;
+   }
+
+   return 0;
 }
 
 static void dsa_slave_bridge_port_leave(struct net_device *dev)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->dp->ds;
+   struct net_device *br = p->bridge_dev;
 
+   p->bridge_dev = NULL;
 
if (ds->drv->port_bridge_leave)
-   ds->drv->port_bridge_leave(ds, p->dp->port);
-
-   p->bridge_dev = NULL;
+   ds->drv->port_bridge_leave(ds, p->dp->port, br);
 
/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
 * so allow it to be in BR_STATE_FORWARDING to be kept functional
-- 
2.8.0

[RFC 12/20] net: dsa: rename dst->ds to dst->switches

2016-04-27 Thread Vivien Didelot

dsa_switch stores the net_device pointers in a "ports" member. Be
consistent and store the dsa_switch pointer in a "switches" member of
the dsa_switch_tree structure.

This free us the "ds" member for a future dsa_switch list.

Signed-off-by: Vivien Didelot 
---
 include/net/dsa.h | 2 +-
 net/dsa/dsa.c | 8 
 net/dsa/tag_brcm.c| 2 +-
 net/dsa/tag_dsa.c | 2 +-
 net/dsa/tag_edsa.c| 2 +-
 net/dsa/tag_trailer.c | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 5f2e7df..389227d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -124,7 +124,7 @@ struct dsa_switch_tree {
/*
 * Data for the individual switch chips.
 */
-   struct dsa_switch   *ds[DSA_MAX_SWITCHES];
+   struct dsa_switch   *switches[DSA_MAX_SWITCHES];
 };
 
 struct dsa_port {
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 3daffb6..aa4a61a 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -857,7 +857,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, 
struct net_device *dev,
continue;
}
 
-   dst->ds[i] = ds;
+   dst->switches[i] = ds;
 
++configured;
}
@@ -953,7 +953,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
wmb();
 
for (i = 0; i < dst->pd->nr_chips; i++) {
-   struct dsa_switch *ds = dst->ds[i];
+   struct dsa_switch *ds = dst->switches[i];
 
if (ds)
dsa_switch_destroy(ds);
@@ -1006,7 +1006,7 @@ static int dsa_suspend(struct device *d)
int i, ret = 0;
 
for (i = 0; i < dst->pd->nr_chips; i++) {
-   struct dsa_switch *ds = dst->ds[i];
+   struct dsa_switch *ds = dst->switches[i];
 
if (ds != NULL)
ret = dsa_switch_suspend(ds);
@@ -1022,7 +1022,7 @@ static int dsa_resume(struct device *d)
int i, ret = 0;
 
for (i = 0; i < dst->pd->nr_chips; i++) {
-   struct dsa_switch *ds = dst->ds[i];
+   struct dsa_switch *ds = dst->switches[i];
 
if (ds != NULL)
ret = dsa_switch_resume(ds);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 3d5aabc..35fc75b 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -102,7 +102,7 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct 
net_device *dev,
if (unlikely(dst == NULL))
goto out_drop;
 
-   ds = dst->ds[0];
+   ds = dst->switches[0];
 
skb = skb_unshare(skb, GFP_ATOMIC);
if (skb == NULL)
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index c870cfa..bf3eebf8 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -109,7 +109,7 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device 
*dev,
 */
if (source_device >= dst->pd->nr_chips)
goto out_drop;
-   ds = dst->ds[source_device];
+   ds = dst->switches[source_device];
if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
goto out_drop;
 
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 898f949d..4ddbb85 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -122,7 +122,7 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device 
*dev,
 */
if (source_device >= dst->pd->nr_chips)
goto out_drop;
-   ds = dst->ds[source_device];
+   ds = dst->switches[source_device];
if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
goto out_drop;
 
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index eaa3440..ade0bbf 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -67,7 +67,7 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device 
*dev,
 
if (unlikely(dst == NULL))
goto out_drop;
-   ds = dst->ds[0];
+   ds = dst->switches[0];
 
skb = skb_unshare(skb, GFP_ATOMIC);
if (skb == NULL)
-- 
2.8.0

[RFC 07/20] net: dsa: list ports in switch

2016-04-27 Thread Vivien Didelot

List DSA port structures in their switch structure, so that drivers can
iterate on them to retrieve information such as their ports membership.

Signed-off-by: Vivien Didelot 
---
 include/net/dsa.h | 9 +
 net/dsa/dsa.c | 4 
 2 files changed, 13 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 69e467c..5f2e7df 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -32,6 +32,11 @@ enum dsa_tag_protocol {
 #define DSA_MAX_SWITCHES   4
 #define DSA_MAX_PORTS  12
 
+#define dsa_switch_for_each_port(_ds, _dp, _num_ports) \
+   for (_dp = list_first_entry(&_ds->dp, typeof(*_dp), list);  \
+&_dp->list != (&_ds->dp) && _dp->port < _num_ports;\
+_dp = list_next_entry(_dp, list))
+
 struct dsa_chip_data {
/*
 * How to access the switch configuration registers.
@@ -123,6 +128,8 @@ struct dsa_switch_tree {
 };
 
 struct dsa_port {
+   struct list_headlist;
+
struct dsa_switch   *ds;
int port;
 
@@ -173,6 +180,8 @@ struct dsa_switch {
u32 phys_mii_mask;
struct mii_bus  *slave_mii_bus;
struct net_device   *ports[DSA_MAX_PORTS];
+
+   struct list_headdp;
 };
 
 static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 222494c..3daffb6 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -225,6 +225,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
struct device *parent)
int index = ds->index;
int i, ret;
 
+   INIT_LIST_HEAD(&ds->dp);
+
/*
 * Validate supplied switch configuration.
 */
@@ -238,6 +240,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, 
struct device *parent)
dp[i]->ds = ds;
dp[i]->port = i;
 
+   list_add_tail(&dp[i]->list, &ds->dp);
+
name = pd->port_names[i];
if (name == NULL)
continue;
-- 
2.8.0

[RFC 10/20] net: dsa: mv88e6xxx: setup a dsa_port

2016-04-27 Thread Vivien Didelot

Change the mv88e6xxx_setup_port function to take a dsa_port structure as
parameter instead of a port index. This will help us get rid of the
private bridge_dev pointer.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 64 -
 1 file changed, 34 insertions(+), 30 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 00a0b92..0687894 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2317,7 +2317,7 @@ static int mv88e6xxx_power_on_serdes(struct dsa_switch 
*ds)
return ret;
 }
 
-static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
+static int mv88e6xxx_setup_port(struct dsa_switch *ds, struct dsa_port *dp)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
int ret;
@@ -2335,8 +2335,10 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
 * and all DSA ports to their maximum bandwidth and
 * full duplex.
 */
-   reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_PCS_CTRL);
-   if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
+   reg = _mv88e6xxx_reg_read(ds, REG_PORT(dp->port),
+ PORT_PCS_CTRL);
+   if (dsa_is_cpu_port(ds, dp->port) ||
+   dsa_is_dsa_port(ds, dp->port)) {
reg &= ~PORT_PCS_CTRL_UNFORCED;
reg |= PORT_PCS_CTRL_FORCE_LINK |
PORT_PCS_CTRL_LINK_UP |
@@ -2350,7 +2352,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
reg |= PORT_PCS_CTRL_UNFORCED;
}
 
-   ret = _mv88e6xxx_reg_write(ds, REG_PORT(port),
+   ret = _mv88e6xxx_reg_write(ds, REG_PORT(dp->port),
   PORT_PCS_CTRL, reg);
if (ret)
goto abort;
@@ -2378,7 +2380,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
reg = PORT_CONTROL_IGMP_MLD_SNOOP |
PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP |
PORT_CONTROL_STATE_FORWARDING;
-   if (dsa_is_cpu_port(ds, port)) {
+   if (dsa_is_cpu_port(ds, dp->port)) {
if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds))
reg |= PORT_CONTROL_DSA_TAG;
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
@@ -2400,7 +2402,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
reg |= PORT_CONTROL_EGRESS_ADD_TAG;
}
}
-   if (dsa_is_dsa_port(ds, port)) {
+   if (dsa_is_dsa_port(ds, dp->port)) {
if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds))
reg |= PORT_CONTROL_DSA_TAG;
if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
@@ -2409,13 +2411,13 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
reg |= PORT_CONTROL_FRAME_MODE_DSA;
}
 
-   if (port == dsa_upstream_port(ds))
+   if (dp->port == dsa_upstream_port(ds))
reg |= PORT_CONTROL_FORWARD_UNKNOWN |
PORT_CONTROL_FORWARD_UNKNOWN_MC;
}
if (reg) {
-   ret = _mv88e6xxx_reg_write(ds, REG_PORT(port),
-  PORT_CONTROL, reg);
+   ret = _mv88e6xxx_reg_write(ds, REG_PORT(dp->port), PORT_CONTROL,
+  reg);
if (ret)
goto abort;
}
@@ -2424,7 +2426,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
 * powered down.
 */
if (mv88e6xxx_6352_family(ds)) {
-   ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_STATUS);
+   ret = _mv88e6xxx_reg_read(ds, REG_PORT(dp->port), PORT_STATUS);
if (ret < 0)
goto abort;
ret &= PORT_STATUS_CMODE_MASK;
@@ -2460,14 +2462,14 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
int port)
/* enable forwarding of unknown multicast addresses to
 * the upstream port
 */
-   if (port == dsa_upstream_port(ds))
+   if (dp->port == dsa_upstream_port(ds))
reg |= PORT_CONTROL_2_FORWARD_UNKNOWN;
}
 
reg |= PORT_CONTROL_2_8021Q_DISABLED;
 
if (reg) {
-   ret = _mv88e6xxx_reg_write(ds, REG_PORT(port),
+   ret = _mv88e6xxx_reg_write(ds, REG_PORT(dp->port),
   PORT_CONTROL_2, reg);
if (ret)
goto abort;
@@ -2478,17 +2480,18 @@ static int mv88e6xxx_set

[RFC 08/20] net: dsa: bcm_sf2: use bridge device from dsa_port

2016-04-27 Thread Vivien Didelot

Now that the DSA layer exposes the DSA port structures to drivers, use
that to retrieve the port bridge membership and thus get rid of the
private bridge_dev pointer.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/bcm_sf2.c | 30 ++
 drivers/net/dsa/bcm_sf2.h |  2 --
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index f7b53fa..6e3b844 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -495,25 +495,24 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, 
struct dsa_port *dp,
  struct net_device *bridge)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
-   unsigned int i;
+   struct dsa_port *intp;
u32 reg, p_ctl;
 
-   priv->port_sts[dp->port].bridge_dev = bridge;
p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
 
-   for (i = 0; i < priv->hw_params.num_ports; i++) {
-   if (priv->port_sts[i].bridge_dev != bridge)
+   dsa_switch_for_each_port(ds, intp, priv->hw_params.num_ports) {
+   if (intp->br != bridge)
continue;
 
/* Add this local port to the remote port VLAN control
 * membership and update the remote port bitmask
 */
-   reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
+   reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(intp->port));
reg |= 1 << dp->port;
-   core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
-   priv->port_sts[i].vlan_ctl_mask = reg;
+   core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(intp->port));
+   priv->port_sts[intp->port].vlan_ctl_mask = reg;
 
-   p_ctl |= 1 << i;
+   p_ctl |= 1 << intp->port;
}
 
/* Configure the local port VLAN control membership to include
@@ -529,29 +528,28 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, 
struct dsa_port *dp,
struct net_device *bridge)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
-   unsigned int i;
+   struct dsa_port *intp;
u32 reg, p_ctl;
 
p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
 
-   for (i = 0; i < priv->hw_params.num_ports; i++) {
+   dsa_switch_for_each_port(ds, intp, priv->hw_params.num_ports) {
/* Don't touch the remaining ports */
-   if (priv->port_sts[i].bridge_dev != bridge)
+   if (intp->br != bridge)
continue;
 
-   reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
+   reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(intp->port));
reg &= ~(1 << dp->port);
-   core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
+   core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(intp->port));
priv->port_sts[dp->port].vlan_ctl_mask = reg;
 
/* Prevent self removal to preserve isolation */
-   if (dp->port != i)
-   p_ctl &= ~(1 << i);
+   if (dp != intp)
+   p_ctl &= ~(1 << intp->port);
}
 
core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(dp->port));
priv->port_sts[dp->port].vlan_ctl_mask = p_ctl;
-   priv->port_sts[dp->port].bridge_dev = NULL;
 }
 
 static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index 200b1f5..6bba1c9 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -50,8 +50,6 @@ struct bcm_sf2_port_status {
struct ethtool_eee eee;
 
u32 vlan_ctl_mask;
-
-   struct net_device *bridge_dev;
 };
 
 struct bcm_sf2_arl_entry {
-- 
2.8.0

[RFC 11/20] net: dsa: mv88e6xxx: use bridge from dsa_port

2016-04-27 Thread Vivien Didelot

Change the _mv88e6xxx_port_based_vlan_map function for a
_mv88e6xxx_port_map_vlantable which takes a dsa_port structure as
parameter. This allows us to iterate on dsa_port's bridge device pointer
and thus get rid of the private bridge_dev structure.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 48 ++---
 drivers/net/dsa/mv88e6xxx.h |  1 -
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 0687894..89d0206 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -,27 +,29 @@ static int _mv88e6xxx_port_state(struct dsa_switch *ds, 
int port, u8 state)
return ret;
 }
 
-static int _mv88e6xxx_port_based_vlan_map(struct dsa_switch *ds, int port)
+static int _mv88e6xxx_port_map_vlantable(struct dsa_switch *ds,
+struct dsa_port *dp)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-   struct net_device *bridge = ps->ports[port].bridge_dev;
const u16 mask = (1 << ps->info->num_ports) - 1;
u16 output_ports = 0;
+   int port = dp->port;
+   struct dsa_port *intp;
int reg;
-   int i;
 
/* allow CPU port or DSA link(s) to send frames to every port */
if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
output_ports = mask;
} else {
-   for (i = 0; i < ps->info->num_ports; ++i) {
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports) {
/* allow sending frames to every group member */
-   if (bridge && ps->ports[i].bridge_dev == bridge)
-   output_ports |= BIT(i);
+   if (intp->br && intp->br == dp->br)
+   output_ports |= BIT(intp->port);
 
/* allow sending frames to CPU port and DSA link(s) */
-   if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i))
-   output_ports |= BIT(i);
+   if (dsa_is_cpu_port(ds, intp->port) ||
+   dsa_is_dsa_port(ds, intp->port))
+   output_ports |= BIT(intp->port);
}
}
 
@@ -2207,16 +2209,15 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, 
struct dsa_port *dp,
   struct net_device *bridge)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-   int i, err;
+   struct dsa_port *intp;
+   int err;
 
mutex_lock(&ps->smi_mutex);
 
-   /* Assign the bridge and remap each port's VLANTable */
-   ps->ports[dp->port].bridge_dev = bridge;
-
-   for (i = 0; i < ps->info->num_ports; ++i) {
-   if (ps->ports[i].bridge_dev == bridge) {
-   err = _mv88e6xxx_port_based_vlan_map(ds, i);
+   /* Remap each port's VLANTable */
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports) {
+   if (intp->br == bridge) {
+   err = _mv88e6xxx_port_map_vlantable(ds, intp);
if (err)
break;
}
@@ -2231,17 +2232,16 @@ void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, 
struct dsa_port *dp,
 struct net_device *bridge)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-   int i;
+   struct dsa_port *intp;
 
mutex_lock(&ps->smi_mutex);
 
-   /* Unassign the bridge and remap each port's VLANTable */
-   ps->ports[dp->port].bridge_dev = NULL;
-
-   for (i = 0; i < ps->info->num_ports; ++i)
-   if (i == dp->port || ps->ports[i].bridge_dev == bridge)
-   if (_mv88e6xxx_port_based_vlan_map(ds, i))
-   netdev_warn(ds->ports[i], "failed to remap\n");
+   /* Remap each port's VLANTable */
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports)
+   if (intp == dp || intp->br == bridge)
+   if (_mv88e6xxx_port_map_vlantable(ds, intp))
+   netdev_warn(ds->ports[intp->port],
+   "failed to remap\n");
 
mutex_unlock(&ps->smi_mutex);
 }
@@ -2573,7 +2573,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, 
struct dsa_port *dp)
if (ret)
goto abort;
 
-   ret = _mv88e6xxx_port_based_vlan_map(ds, dp->port);
+   ret = _mv88e6xxx_port_map_vlantable(ds, dp);
if (ret)
goto abort;
 
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index c49a514..56e3347 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -378,7 +378,6 @@ struct mv88e6xxx_vtu_stu_entry {
 };
 
 struct mv88e6xxx_priv_port {
-   struct net_device *bridge_dev;
u8 state

[RFC 06/20] net: dsa: move bridge device in dsa_port

2016-04-27 Thread Vivien Didelot

Move the pointer to the bridge device in the DSA port structure instead
of cluttering the dsa_slave_priv structure.

This can later be used by drivers to help them configuring their bridge
group ports membership.

Signed-off-by: Vivien Didelot 
---
 include/net/dsa.h  |  2 ++
 net/dsa/dsa_priv.h |  1 -
 net/dsa/slave.c| 16 +---
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 08a9536..69e467c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -125,6 +125,8 @@ struct dsa_switch_tree {
 struct dsa_port {
struct dsa_switch   *ds;
int port;
+
+   struct net_device   *br;
 };
 
 struct dsa_switch {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index c7d5df0..c5afddd 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -35,7 +35,6 @@ struct dsa_slave_priv {
int old_pause;
int old_duplex;
 
-   struct net_device   *bridge_dev;
 #ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll  *netpoll;
 #endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d6b6019..b90caf8 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -64,18 +64,12 @@ static int dsa_slave_get_iflink(const struct net_device 
*dev)
return p->dp->ds->dst->master_netdev->ifindex;
 }
 
-static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p)
-{
-   return !!p->bridge_dev;
-}
-
 static int dsa_slave_open(struct net_device *dev)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->dp->ds;
struct net_device *master = ds->dst->master_netdev;
-   u8 stp_state = dsa_port_is_bridged(p) ?
-   BR_STATE_BLOCKING : BR_STATE_FORWARDING;
+   u8 stp_state = p->dp->br ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
int err;
 
if (!(master->flags & IFF_UP))
@@ -438,13 +432,13 @@ static int dsa_slave_bridge_port_join(struct net_device 
*dev,
struct dsa_switch *ds = p->dp->ds;
int ret = -EOPNOTSUPP;
 
-   p->bridge_dev = br;
+   p->dp->br = br;
 
if (ds->drv->port_bridge_join)
ret = ds->drv->port_bridge_join(ds, p->dp, br);
 
if (ret && ret != -EOPNOTSUPP) {
-   p->bridge_dev = NULL;
+   p->dp->br = NULL;
return ret;
}
 
@@ -455,9 +449,9 @@ static void dsa_slave_bridge_port_leave(struct net_device 
*dev)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->dp->ds;
-   struct net_device *br = p->bridge_dev;
+   struct net_device *br = p->dp->br;
 
-   p->bridge_dev = NULL;
+   p->dp->br = NULL;
 
if (ds->drv->port_bridge_leave)
ds->drv->port_bridge_leave(ds, p->dp, br);
-- 
2.8.0

[RFC 09/20] net: dsa: mv88e6xxx: check HW vlan with dsa_port

2016-04-27 Thread Vivien Didelot

Change the mv88e6xxx_port_check_hw_vlan function for a
mv88e6xxx_port_check_vtu which takes a dsa_port structure as parameter.
This will help us get rid of the bridge_dev pointer.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 7e03f4c..00a0b92 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1729,12 +1729,13 @@ static int _mv88e6xxx_vtu_get(struct dsa_switch *ds, 
u16 vid,
return err;
 }
 
-static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port,
-   u16 vid_begin, u16 vid_end)
+static int mv88e6xxx_port_check_vtu(struct dsa_switch *ds, struct dsa_port *dp,
+   u16 vid_begin, u16 vid_end)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
struct mv88e6xxx_vtu_stu_entry vlan;
-   int i, err;
+   struct dsa_port *intp;
+   int err;
 
if (!vid_begin)
return -EOPNOTSUPP;
@@ -1756,22 +1757,21 @@ static int mv88e6xxx_port_check_hw_vlan(struct 
dsa_switch *ds, int port,
if (vlan.vid > vid_end)
break;
 
-   for (i = 0; i < ps->info->num_ports; ++i) {
-   if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i))
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports) {
+   if (dsa_is_dsa_port(ds, intp->port) ||
+   dsa_is_cpu_port(ds, intp->port))
continue;
 
-   if (vlan.data[i] ==
+   if (vlan.data[intp->port] ==
GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER)
continue;
 
-   if (ps->ports[i].bridge_dev ==
-   ps->ports[port].bridge_dev)
+   if (intp->br == dp->br)
break; /* same bridge, check next VLAN */
 
-   netdev_warn(ds->ports[port],
+   netdev_warn(ds->ports[dp->port],
"hardware VLAN %d already used by %s\n",
-   vlan.vid,
-   netdev_name(ps->ports[i].bridge_dev));
+   vlan.vid, netdev_name(intp->br));
err = -EOPNOTSUPP;
goto unlock;
}
@@ -1836,8 +1836,7 @@ int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, 
struct dsa_port *dp,
/* If the requested port doesn't belong to the same bridge as the VLAN
 * members, do not support it (yet) and fallback to software VLAN.
 */
-   err = mv88e6xxx_port_check_hw_vlan(ds, dp->port, vlan->vid_begin,
-  vlan->vid_end);
+   err = mv88e6xxx_port_check_vtu(ds, dp, vlan->vid_begin, vlan->vid_end);
if (err)
return err;
 
-- 
2.8.0

[RFC 20/20] net: dsa: mv88e6xxx: setup PVT on cross-chip ops

2016-04-27 Thread Vivien Didelot

Switches with a Cross-chip Port VLAN Table are currently configured to
allow cross-chip frames to egress any internal ports. This means that
unbridged cross-chip ports can actually talk to each other, and this is
not what we want.

In order to restrict that, we need to setup the PVT entry for an
external port when it joins or leave a bridge group crossing the switch.

Also initialize the PVT to forbid egressing of cross-chip frames to
internal user ports by default.

Note that a PVT-less switch cannot forbid such frames to egress its
internal ports, unless the kernel supports VLAN filtering. In such
systems, a bridge group is also implemented as a 802.1Q VLAN and thus a
global VTU-based logic can be used to correctly implement cross-chip
hardware bridging. Warn the user if the setup doesn't respect this.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 98 +++--
 1 file changed, 95 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 4341ffd..e0f9e93 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2272,8 +2272,29 @@ static int _mv88e6xxx_pvt_cmd(struct dsa_switch *ds, int 
src_dev, int src_port,
return _mv88e6xxx_pvt_wait(ds);
 }
 
+static int _mv88e6xxx_pvt_write(struct dsa_switch *ds, int src_dev,
+   int src_port, u16 data)
+{
+   int err;
+
+   err = _mv88e6xxx_pvt_wait(ds);
+   if (err)
+   return err;
+
+   err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_PVT_DATA, data);
+   if (err)
+   return err;
+
+return _mv88e6xxx_pvt_cmd(ds, src_dev, src_port,
+ GLOBAL2_PVT_ADDR_OP_WRITE_PVLAN);
+}
+
 static int _mv88e6xxx_pvt_init(struct dsa_switch *ds)
 {
+   struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+   struct dsa_port *intp;
+   int src_dev, src_port;
+   u16 pv = 0;
int err;
 
/* Clear 5 Bit Port for usage with Marvell Link Street devices:
@@ -2284,8 +2305,60 @@ static int _mv88e6xxx_pvt_init(struct dsa_switch *ds)
if (err)
return err;
 
-   /* Allow any cross-chip frames to egress any internal ports */
-   return _mv88e6xxx_pvt_cmd(ds, 0, 0, GLOBAL2_PVT_ADDR_OP_INIT_ONES);
+   /* Forbid cross-chip frames to egress internal ports */
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports)
+   if (dsa_is_cpu_port(ds, intp->port) ||
+   dsa_is_dsa_port(ds, intp->port))
+   pv |= BIT(intp->port);
+
+   for (src_dev = 0; src_dev < 32; ++src_dev) {
+   for (src_port = 0; src_port < 16; ++src_port) {
+   err = _mv88e6xxx_pvt_write(ds, src_dev, src_port, pv);
+   if (err)
+   return err;
+   }
+   }
+
+   return 0;
+}
+
+static int _mv88e6xxx_port_map_pvt(struct dsa_switch *ds, struct dsa_port *dp)
+{
+   struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+   struct dsa_port *intp;
+   u16 pvlan = 0;
+
+   /* Cross-chip frames can egress CPU and DSA ports, and bridge members */
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports)
+   if (dsa_is_cpu_port(ds, intp->port) ||
+   dsa_is_dsa_port(ds, intp->port) ||
+   (intp->br && intp->br == dp->br))
+   pvlan |= BIT(intp->port);
+
+   return _mv88e6xxx_pvt_write(ds, dp->ds->index, dp->port, pvlan);
+}
+
+static int _mv88e6xxx_remap_pvt(struct dsa_switch *ds,
+   struct net_device *bridge)
+{
+   struct dsa_switch *dsa_sw;
+   struct dsa_port *dsa_p;
+   int err;
+
+   dsa_tree_for_each_switch(ds->dst, dsa_sw) {
+   if (dsa_sw == ds)
+   continue;
+
+   dsa_switch_for_each_port(dsa_sw, dsa_p, DSA_MAX_PORTS) {
+   if (dsa_p->br == bridge) {
+   err = _mv88e6xxx_port_map_pvt(ds, dsa_p);
+   if (err)
+   return err;
+   }
+   }
+   }
+
+   return 0;
 }
 
 int mv88e6xxx_port_bridge_change(struct dsa_switch *ds, struct dsa_port *dp,
@@ -2297,7 +2370,19 @@ int mv88e6xxx_port_bridge_change(struct dsa_switch *ds, 
struct dsa_port *dp,
mutex_lock(&ps->smi_mutex);
 
if (dsa_port_is_external(dp, ds)) {
-   err = -EOPNOTSUPP;
+   /* Forbidding hardware bridging of cross-chip frames requires a
+* Cross-chip Port VLAN Table (PVT), unless VLAN filtering is
+* enabled, in which case a global VTU-based logic works.
+*/
+   if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PVT)) {
+   err = _mv88e6xxx_port_map_pvt(ds, dp);
+   } else if (IS_ENABL

[RFC 19/20] net: dsa: mv88e6xxx: conditionally init PVT

2016-04-27 Thread Vivien Didelot

The current code initialize the Cross-chip Port VLAN Table to all ones,
even tough the switch model doesn't have one.

It also assumes that the switch is configured to support up to
32-switch/16-port cross-chip devices.

Implement the access to the PVT and initialize it only if the switch has
such feature. Support only 88E6352 for the moment.

This commit brings no functional change for devices with a PVT.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6352.c |  1 +
 drivers/net/dsa/mv88e6xxx.c | 54 +++--
 drivers/net/dsa/mv88e6xxx.h |  6 +
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/drivers/net/dsa/mv88e6352.c b/drivers/net/dsa/mv88e6352.c
index 4afc24d..29d9fd76 100644
--- a/drivers/net/dsa/mv88e6352.c
+++ b/drivers/net/dsa/mv88e6352.c
@@ -59,6 +59,7 @@ static const struct mv88e6xxx_info mv88e6352_table[] = {
.name = "Marvell 88E6352",
.num_databases = 4096,
.num_ports = 7,
+   .flags = BIT(MV88E6XXX_FLAG_PVT),
}
 };
 
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 25852ee..4341ffd 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2247,6 +2247,47 @@ unlock:
return err;
 }
 
+static int _mv88e6xxx_pvt_wait(struct dsa_switch *ds)
+{
+   return _mv88e6xxx_wait(ds, REG_GLOBAL2, GLOBAL2_PVT_ADDR,
+  GLOBAL2_PVT_ADDR_BUSY);
+}
+
+static int _mv88e6xxx_pvt_cmd(struct dsa_switch *ds, int src_dev, int src_port,
+ u16 op)
+{
+   u16 reg = op;
+   int err;
+
+   /* 9-bit Cross-chip PVT pointer: with GLOBAL2_MISC_5_BIT_PORT cleared,
+* source device is 5-bit, source port is 4-bit.
+*/
+   reg |= (src_dev & 0x1f) << 4;
+   reg |= (src_port & 0xf);
+
+   err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_PVT_ADDR, reg);
+   if (err)
+   return err;
+
+   return _mv88e6xxx_pvt_wait(ds);
+}
+
+static int _mv88e6xxx_pvt_init(struct dsa_switch *ds)
+{
+   int err;
+
+   /* Clear 5 Bit Port for usage with Marvell Link Street devices:
+* use 4 bits for the Src_Port/Src_Trunk and 5 bits for the Src_Dev.
+*/
+   err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2, GLOBAL2_MISC,
+  0 & ~GLOBAL2_MISC_5_BIT_PORT);
+   if (err)
+   return err;
+
+   /* Allow any cross-chip frames to egress any internal ports */
+   return _mv88e6xxx_pvt_cmd(ds, 0, 0, GLOBAL2_PVT_ADDR_OP_INIT_ONES);
+}
+
 int mv88e6xxx_port_bridge_change(struct dsa_switch *ds, struct dsa_port *dp,
 struct net_device *bridge)
 {
@@ -2770,13 +2811,12 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
if (err)
goto unlock;
 
-   /* Initialise cross-chip port VLAN table to reset
-* defaults.
-*/
-   err = _mv88e6xxx_reg_write(ds, REG_GLOBAL2,
-  GLOBAL2_PVT_ADDR, 0x9000);
-   if (err)
-   goto unlock;
+   /* Initialize Cross-chip Port VLAN Table (PVT) */
+   if (mv88e6xxx_has(ps, MV88E6XXX_FLAG_PVT)) {
+   err = _mv88e6xxx_pvt_init(ds);
+   if (err)
+   goto unlock;
+   }
 
/* Clear the priority override table. */
for (i = 0; i < 16; i++) {
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index 325caf8..fbde8b4 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -298,6 +298,10 @@
 #define GLOBAL2_INGRESS_OP 0x09
 #define GLOBAL2_INGRESS_DATA   0x0a
 #define GLOBAL2_PVT_ADDR   0x0b
+#define GLOBAL2_PVT_ADDR_BUSY  BIT(15)
+#define GLOBAL2_PVT_ADDR_OP_INIT_ONES  ((0x01 << 12) | GLOBAL2_PVT_ADDR_BUSY)
+#define GLOBAL2_PVT_ADDR_OP_WRITE_PVLAN((0x03 << 12) | 
GLOBAL2_PVT_ADDR_BUSY)
+#define GLOBAL2_PVT_ADDR_OP_READ   ((0x04 << 12) | GLOBAL2_PVT_ADDR_BUSY)
 #define GLOBAL2_PVT_DATA   0x0c
 #define GLOBAL2_SWITCH_MAC 0x0d
 #define GLOBAL2_SWITCH_MAC_BUSY BIT(15)
@@ -335,10 +339,12 @@
 #define GLOBAL2_WDOG_CONTROL   0x1b
 #define GLOBAL2_QOS_WEIGHT 0x1c
 #define GLOBAL2_MISC   0x1d
+#define GLOBAL2_MISC_5_BIT_PORTBIT(14)
 
 #define MV88E6XXX_N_FID4096
 
 enum mv88e6xxx_flag {
+   MV88E6XXX_FLAG_PVT,
MV88E6XXX_NUM_FLAGS,
 };
 
-- 
2.8.0

[RFC 18/20] net: dsa: mv88e6xxx: add flags to info

2016-04-27 Thread Vivien Didelot

Add a flags bitmap to the mv88e6xxx_info structure to help describing
features supported or not by a switch model.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index 56e3347..325caf8 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -338,6 +338,10 @@
 
 #define MV88E6XXX_N_FID4096
 
+enum mv88e6xxx_flag {
+   MV88E6XXX_NUM_FLAGS,
+};
+
 enum mv88e6xxx_family {
MV88E6XXX_FAMILY_NONE,
MV88E6XXX_FAMILY_6065,  /* 6031 6035 6061 6065 */
@@ -356,6 +360,7 @@ struct mv88e6xxx_info {
const char *name;
unsigned int num_databases;
unsigned int num_ports;
+   unsigned long flags;
 };
 
 struct mv88e6xxx_atu_entry {
@@ -445,6 +450,12 @@ struct mv88e6xxx_hw_stat {
enum stat_type type;
 };
 
+static inline bool mv88e6xxx_has(struct mv88e6xxx_priv_state *ps,
+enum mv88e6xxx_flag flag)
+{
+   return !!(ps->info->flags & BIT(flag));
+}
+
 int mv88e6xxx_switch_reset(struct dsa_switch *ds, bool ppu_active);
 const char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device 
*host_dev,
int sw_addr, void **priv,
-- 
2.8.0

[RFC 17/20] net: dsa: mv88e6xxx: factorize port bridge change

2016-04-27 Thread Vivien Didelot

Implement a mv88e6xxx_port_bridge_change function to factorize the
configuration needed when a port joins or leaves a bridge group.

This will simplify the implementation of cross-chip bridging.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 67 +++--
 1 file changed, 40 insertions(+), 27 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 8004d00..25852ee 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1150,6 +1150,24 @@ static int _mv88e6xxx_port_map_vlantable(struct 
dsa_switch *ds,
return _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_BASE_VLAN, reg);
 }
 
+static int _mv88e6xxx_remap_vlantable(struct dsa_switch *ds,
+ struct net_device *bridge)
+{
+   struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+   struct dsa_port *intp;
+   int err;
+
+   dsa_switch_for_each_port(ds, intp, ps->info->num_ports) {
+   if (intp->br == bridge) {
+   err = _mv88e6xxx_port_map_vlantable(ds, intp);
+   if (err)
+   return err;
+   }
+   }
+
+   return 0;
+}
+
 void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
@@ -2229,51 +2247,46 @@ unlock:
return err;
 }
 
-int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp,
-  struct net_device *bridge)
+int mv88e6xxx_port_bridge_change(struct dsa_switch *ds, struct dsa_port *dp,
+struct net_device *bridge)
 {
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-   struct dsa_port *intp;
int err;
 
-   if (dsa_port_is_external(dp, ds))
-   return -EOPNOTSUPP;
-
mutex_lock(&ps->smi_mutex);
 
-   /* Remap each port's VLANTable */
-   dsa_switch_for_each_port(ds, intp, ps->info->num_ports) {
-   if (intp->br == bridge) {
-   err = _mv88e6xxx_port_map_vlantable(ds, intp);
+   if (dsa_port_is_external(dp, ds)) {
+   err = -EOPNOTSUPP;
+   } else {
+   /* Remap VLANTable of concerned in-chip ports */
+   if (!dp->br) {
+   err = _mv88e6xxx_port_map_vlantable(ds, dp);
if (err)
-   break;
+   goto unlock;
}
+
+   err = _mv88e6xxx_remap_vlantable(ds, bridge);
+   if (err)
+   goto unlock;
}
 
+unlock:
mutex_unlock(&ps->smi_mutex);
 
return err;
 }
 
+int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, struct dsa_port *dp,
+  struct net_device *bridge)
+{
+   return mv88e6xxx_port_bridge_change(ds, dp, bridge);
+}
+
 void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, struct dsa_port *dp,
 struct net_device *bridge)
 {
-   struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
-   struct dsa_port *intp;
-
-   if (dsa_port_is_external(dp, ds))
-   return;
-
-   mutex_lock(&ps->smi_mutex);
-
-   /* Remap each port's VLANTable */
-   dsa_switch_for_each_port(ds, intp, ps->info->num_ports)
-   if (intp == dp || intp->br == bridge)
-   if (_mv88e6xxx_port_map_vlantable(ds, intp))
-   netdev_warn(ds->ports[intp->port],
-   "failed to remap\n");
-
-   mutex_unlock(&ps->smi_mutex);
+   if (mv88e6xxx_port_bridge_change(ds, dp, bridge))
+   netdev_err(ds->ports[dp->port], "failed to unbridge\n");
 }
 
 static void mv88e6xxx_bridge_work(struct work_struct *work)
-- 
2.8.0

[RFC 14/20] net: dsa: add tree-wide bridge ops

2016-04-27 Thread Vivien Didelot

In order to support cross-chip operations, we need to inform each switch
driver when a port operation occurs in a DSA tree.

This allows drivers to configure cross-chip port-based VLAN table, VTU
or FDB entries on DSA links, in order to implement a correct hardware
switching of frames.

Add a new tree.c file to implement tree-wide operations, propagating a
port-based operation on each switch of a tree.

Implement tree-wide bridge operations.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/bcm_sf2.c   |  6 +
 drivers/net/dsa/mv88e6xxx.c |  6 +
 include/net/dsa.h   |  6 +
 net/dsa/Makefile|  2 +-
 net/dsa/dsa_priv.h  |  6 +
 net/dsa/slave.c | 46 ---
 net/dsa/tree.c  | 66 +
 7 files changed, 96 insertions(+), 42 deletions(-)
 create mode 100644 net/dsa/tree.c

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 6e3b844..0a91ea9 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -498,6 +498,9 @@ static int bcm_sf2_sw_br_join(struct dsa_switch *ds, struct 
dsa_port *dp,
struct dsa_port *intp;
u32 reg, p_ctl;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
 
dsa_switch_for_each_port(ds, intp, priv->hw_params.num_ports) {
@@ -531,6 +534,9 @@ static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, 
struct dsa_port *dp,
struct dsa_port *intp;
u32 reg, p_ctl;
 
+   if (dsa_port_is_external(dp, ds))
+   return;
+
p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(dp->port));
 
dsa_switch_for_each_port(ds, intp, priv->hw_params.num_ports) {
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 89d0206..6fef29b 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2212,6 +2212,9 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, 
struct dsa_port *dp,
struct dsa_port *intp;
int err;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
mutex_lock(&ps->smi_mutex);
 
/* Remap each port's VLANTable */
@@ -2234,6 +2237,9 @@ void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, 
struct dsa_port *dp,
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
struct dsa_port *intp;
 
+   if (dsa_port_is_external(dp, ds))
+   return;
+
mutex_lock(&ps->smi_mutex);
 
/* Remap each port's VLANTable */
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 85fac8a..33172c9 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -193,6 +193,12 @@ struct dsa_switch {
struct list_headdp;
 };
 
+static inline bool dsa_port_is_external(struct dsa_port *dp,
+   struct dsa_switch *ds)
+{
+   return dp->ds != ds;
+}
+
 static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
 {
return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port);
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index da06ed1..bf8d12c 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,6 +1,6 @@
 # the core
 obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o slave.o
+dsa_core-y += dsa.o tree.o slave.o
 
 # tagging formats
 dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index c5afddd..6e08b3d 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -46,6 +46,12 @@ struct dsa_slave_priv {
 /* dsa.c */
 extern char dsa_driver_version[];
 
+/* tree.c */
+int dsa_tree_bridge_port_join(struct dsa_switch_tree *dst, struct dsa_port *dp,
+ struct net_device *br);
+void dsa_tree_bridge_port_leave(struct dsa_switch_tree *dst,
+   struct dsa_port *dp, struct net_device *br);
+
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index b90caf8..7123ae2 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -425,45 +425,6 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
return err;
 }
 
-static int dsa_slave_bridge_port_join(struct net_device *dev,
- struct net_device *br)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
-   int ret = -EOPNOTSUPP;
-
-   p->dp->br = br;
-
-   if (ds->drv->port_bridge_join)
-   ret = ds->drv->port_bridge_join(ds, p->dp, br);
-
-   if (ret && ret != -EOPNOTSUPP) {
-   p->dp->br = NULL;
-   return ret;
-   }
-
-   return 0;
-}
-
-static void dsa_slave_bridge_port_leave(struct net_device *dev)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   st

[RFC 16/20] net: dsa: add tree-wide VLAN ops

2016-04-27 Thread Vivien Didelot

In order to support cross-chip operations, we need to inform each switch
driver when a port operation occurs in a DSA tree.

Implement tree-wide VLAN operations.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/mv88e6xxx.c | 12 +
 net/dsa/dsa_priv.h  |  8 ++
 net/dsa/slave.c | 59 ++--
 net/dsa/tree.c  | 60 +
 4 files changed, 87 insertions(+), 52 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 7d29de3..8004d00 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1378,6 +1378,9 @@ int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, 
struct dsa_port *dp,
u16 pvid;
int err;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
mutex_lock(&ps->smi_mutex);
 
err = _mv88e6xxx_port_pvid_get(ds, dp->port, &pvid);
@@ -1835,6 +1838,9 @@ int mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, 
struct dsa_port *dp,
 {
int err;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
/* If the requested port doesn't belong to the same bridge as the VLAN
 * members, do not support it (yet) and fallback to software VLAN.
 */
@@ -1874,6 +1880,9 @@ void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, 
struct dsa_port *dp,
bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
u16 vid;
 
+   if (dsa_port_is_external(dp, ds))
+   return;
+
mutex_lock(&ps->smi_mutex);
 
for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid)
@@ -1930,6 +1939,9 @@ int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, struct 
dsa_port *dp,
u16 pvid, vid;
int err = 0;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
mutex_lock(&ps->smi_mutex);
 
err = _mv88e6xxx_port_pvid_get(ds, dp->port, &pvid);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index e8765c3..d743d6a 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -60,6 +60,14 @@ int dsa_tree_port_fdb_del(struct dsa_switch_tree *dst, 
struct dsa_port *dp,
 int dsa_tree_port_fdb_dump(struct dsa_switch_tree *dst, struct dsa_port *dp,
   struct switchdev_obj_port_fdb *fdb,
   switchdev_obj_dump_cb_t *cb);
+int dsa_tree_port_vlan_add(struct dsa_switch_tree *dst, struct dsa_port *dp,
+  const struct switchdev_obj_port_vlan *vlan,
+  struct switchdev_trans *trans);
+int dsa_tree_port_vlan_del(struct dsa_switch_tree *dst, struct dsa_port *dp,
+  const struct switchdev_obj_port_vlan *vlan);
+int dsa_tree_port_vlan_dump(struct dsa_switch_tree *dst, struct dsa_port *dp,
+   struct switchdev_obj_port_vlan *vlan,
+   switchdev_obj_dump_cb_t *cb);
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 90bcf8a..19469dc 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -195,50 +195,6 @@ out:
return 0;
 }
 
-static int dsa_slave_port_vlan_add(struct net_device *dev,
-  const struct switchdev_obj_port_vlan *vlan,
-  struct switchdev_trans *trans)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
-
-   if (switchdev_trans_ph_prepare(trans)) {
-   if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add)
-   return -EOPNOTSUPP;
-
-   return ds->drv->port_vlan_prepare(ds, p->dp, vlan, trans);
-   }
-
-   ds->drv->port_vlan_add(ds, p->dp, vlan, trans);
-
-   return 0;
-}
-
-static int dsa_slave_port_vlan_del(struct net_device *dev,
-  const struct switchdev_obj_port_vlan *vlan)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
-
-   if (!ds->drv->port_vlan_del)
-   return -EOPNOTSUPP;
-
-   return ds->drv->port_vlan_del(ds, p->dp, vlan);
-}
-
-static int dsa_slave_port_vlan_dump(struct net_device *dev,
-   struct switchdev_obj_port_vlan *vlan,
-   switchdev_obj_dump_cb_t *cb)
-{
-   struct dsa_slave_priv *p = netdev_priv(dev);
-   struct dsa_switch *ds = p->dp->ds;
-
-   if (ds->drv->port_vlan_dump)
-   return ds->drv->port_vlan_dump(ds, p->dp, vlan, cb);
-
-   return -EOPNOTSUPP;
-}
-
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -323,9 +279,9 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
SWITCHDEV_OBJ_PORT_FDB(obj), trans);

[RFC 13/20] net: dsa: list switches in tree

2016-04-27 Thread Vivien Didelot

List the registered dsa_switch structures in a "ds" member of the
dsa_switch_tree structure. This allows the drivers to easily iterate on
the DSA switch structures of their related DSA tree.

Signed-off-by: Vivien Didelot 
---
 include/net/dsa.h | 9 +
 net/dsa/dsa.c | 3 +++
 2 files changed, 12 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 389227d..85fac8a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -32,11 +32,16 @@ enum dsa_tag_protocol {
 #define DSA_MAX_SWITCHES   4
 #define DSA_MAX_PORTS  12
 
+
+#define dsa_tree_for_each_switch(_dst, _ds)\
+   list_for_each_entry(_ds, &_dst->ds, list)
+
 #define dsa_switch_for_each_port(_ds, _dp, _num_ports) \
for (_dp = list_first_entry(&_ds->dp, typeof(*_dp), list);  \
 &_dp->list != (&_ds->dp) && _dp->port < _num_ports;\
 _dp = list_next_entry(_dp, list))
 
+
 struct dsa_chip_data {
/*
 * How to access the switch configuration registers.
@@ -125,6 +130,8 @@ struct dsa_switch_tree {
 * Data for the individual switch chips.
 */
struct dsa_switch   *switches[DSA_MAX_SWITCHES];
+
+   struct list_headds;
 };
 
 struct dsa_port {
@@ -137,6 +144,8 @@ struct dsa_port {
 };
 
 struct dsa_switch {
+   struct list_headlist;
+
/*
 * Parent switch tree, and switch index.
 */
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index aa4a61a..b0055c7 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -842,6 +842,8 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, 
struct net_device *dev,
int i;
unsigned configured = 0;
 
+   INIT_LIST_HEAD(&dst->ds);
+
dst->pd = pd;
dst->master_netdev = dev;
dst->cpu_switch = -1;
@@ -858,6 +860,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, 
struct net_device *dev,
}
 
dst->switches[i] = ds;
+   list_add_tail(&ds->list, &dst->ds);
 
++configured;
}
-- 
2.8.0

[RFC 04/20] net: dsa: pass dsa_port down to drivers FDB ops

2016-04-27 Thread Vivien Didelot

Now that DSA as proper structure for DSA ports, pass it down to the
port_fdb_{prepare,add,del,dump} driver functions.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/bcm_sf2.c   | 20 +++-
 drivers/net/dsa/mv88e6xxx.c | 22 +++---
 drivers/net/dsa/mv88e6xxx.h |  8 
 include/net/dsa.h   |  8 
 net/dsa/slave.c |  8 
 5 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 2d7b297..f7b53fa 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -725,7 +725,7 @@ static int bcm_sf2_arl_op(struct bcm_sf2_priv *priv, int 
op, int port,
return bcm_sf2_arl_read(priv, mac, vid, &ent, &idx, is_valid);
 }
 
-static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, int port,
+static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, struct dsa_port *dp,
  const struct switchdev_obj_port_fdb *fdb,
  struct switchdev_trans *trans)
 {
@@ -733,22 +733,22 @@ static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, 
int port,
return 0;
 }
 
-static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, int port,
+static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, struct dsa_port *dp,
   const struct switchdev_obj_port_fdb *fdb,
   struct switchdev_trans *trans)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
 
-   if (bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, true))
+   if (bcm_sf2_arl_op(priv, 0, dp->port, fdb->addr, fdb->vid, true))
pr_err("%s: failed to add MAC address\n", __func__);
 }
 
-static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, int port,
+static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, struct dsa_port *dp,
  const struct switchdev_obj_port_fdb *fdb)
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
 
-   return bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, false);
+   return bcm_sf2_arl_op(priv, 0, dp->port, fdb->addr, fdb->vid, false);
 }
 
 static int bcm_sf2_arl_search_wait(struct bcm_sf2_priv *priv)
@@ -799,16 +799,18 @@ static int bcm_sf2_sw_fdb_copy(struct net_device *dev, 
int port,
return cb(&fdb->obj);
 }
 
-static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, int port,
+static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, struct dsa_port *dp,
   struct switchdev_obj_port_fdb *fdb,
   int (*cb)(struct switchdev_obj *obj))
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
-   struct net_device *dev = ds->ports[port];
+   struct net_device *dev;
struct bcm_sf2_arl_entry results[2];
unsigned int count = 0;
int ret;
 
+   dev = ds->ports[dp->port];
+
/* Start search operation */
core_writel(priv, ARLA_SRCH_STDN, CORE_ARLA_SRCH_CTL);
 
@@ -819,12 +821,12 @@ static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, int 
port,
 
/* Read both entries, then return their values back */
bcm_sf2_arl_search_rd(priv, 0, &results[0]);
-   ret = bcm_sf2_sw_fdb_copy(dev, port, &results[0], fdb, cb);
+   ret = bcm_sf2_sw_fdb_copy(dev, dp->port, &results[0], fdb, cb);
if (ret)
return ret;
 
bcm_sf2_arl_search_rd(priv, 1, &results[1]);
-   ret = bcm_sf2_sw_fdb_copy(dev, port, &results[1], fdb, cb);
+   ret = bcm_sf2_sw_fdb_copy(dev, dp->port, &results[1], fdb, cb);
if (ret)
return ret;
 
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 3f78c73..c1ff763 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2031,7 +2031,7 @@ static int _mv88e6xxx_port_fdb_load(struct dsa_switch 
*ds, int port,
return _mv88e6xxx_atu_load(ds, &entry);
 }
 
-int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port,
+int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, struct dsa_port *dp,
   const struct switchdev_obj_port_fdb *fdb,
   struct switchdev_trans *trans)
 {
@@ -2041,7 +2041,7 @@ int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int 
port,
return 0;
 }
 
-void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
+void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, struct dsa_port *dp,
const struct switchdev_obj_port_fdb *fdb,
struct switchdev_trans *trans)
 {
@@ -2051,19 +2051,19 @@ void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int 
port,
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 
mutex_lock(&ps->smi_mutex);
-   if (_mv88e6xxx_port_fdb_load(ds, port, fdb->addr, fdb->vid, state))
-   netdev_err(ds->ports[port], "failed to load

[RFC 15/20] net: dsa: add tree-wide FDB ops

2016-04-27 Thread Vivien Didelot

In order to support cross-chip operations, we need to inform each switch
driver when a port operation occurs in a DSA tree.

Implement tree-wide FDB operations.

Signed-off-by: Vivien Didelot 
---
 drivers/net/dsa/bcm_sf2.c   | 12 
 drivers/net/dsa/mv88e6xxx.c | 12 
 net/dsa/dsa_priv.h  |  9 ++
 net/dsa/slave.c | 68 ++---
 net/dsa/tree.c  | 61 
 5 files changed, 109 insertions(+), 53 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 0a91ea9..6e634e5 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -733,6 +733,9 @@ static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, 
struct dsa_port *dp,
  const struct switchdev_obj_port_fdb *fdb,
  struct switchdev_trans *trans)
 {
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
/* We do not need to do anything specific here yet */
return 0;
 }
@@ -743,6 +746,9 @@ static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, 
struct dsa_port *dp,
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
 
+   if (dsa_port_is_external(dp, ds))
+   return;
+
if (bcm_sf2_arl_op(priv, 0, dp->port, fdb->addr, fdb->vid, true))
pr_err("%s: failed to add MAC address\n", __func__);
 }
@@ -752,6 +758,9 @@ static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, struct 
dsa_port *dp,
 {
struct bcm_sf2_priv *priv = ds_to_priv(ds);
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
return bcm_sf2_arl_op(priv, 0, dp->port, fdb->addr, fdb->vid, false);
 }
 
@@ -813,6 +822,9 @@ static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, 
struct dsa_port *dp,
unsigned int count = 0;
int ret;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
dev = ds->ports[dp->port];
 
/* Start search operation */
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index 6fef29b..7d29de3 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -2037,6 +2037,9 @@ int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, 
struct dsa_port *dp,
   const struct switchdev_obj_port_fdb *fdb,
   struct switchdev_trans *trans)
 {
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
/* We don't need any dynamic resource from the kernel (yet),
 * so skip the prepare phase.
 */
@@ -2052,6 +2055,9 @@ void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, struct 
dsa_port *dp,
GLOBAL_ATU_DATA_STATE_UC_STATIC;
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
 
+   if (dsa_port_is_external(dp, ds))
+   return;
+
mutex_lock(&ps->smi_mutex);
if (_mv88e6xxx_port_fdb_load(ds, dp->port, fdb->addr, fdb->vid, state))
netdev_err(ds->ports[dp->port], "failed to load MAC address\n");
@@ -2064,6 +2070,9 @@ int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, struct 
dsa_port *dp,
struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
int ret;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
mutex_lock(&ps->smi_mutex);
ret = _mv88e6xxx_port_fdb_load(ds, dp->port, fdb->addr, fdb->vid,
   GLOBAL_ATU_DATA_STATE_UNUSED);
@@ -2169,6 +2178,9 @@ int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, struct 
dsa_port *dp,
u16 fid;
int err;
 
+   if (dsa_port_is_external(dp, ds))
+   return -EOPNOTSUPP;
+
mutex_lock(&ps->smi_mutex);
 
/* Dump port's default Filtering Information Database (VLAN ID 0) */
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 6e08b3d..e8765c3 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
@@ -51,6 +52,14 @@ int dsa_tree_bridge_port_join(struct dsa_switch_tree *dst, 
struct dsa_port *dp,
  struct net_device *br);
 void dsa_tree_bridge_port_leave(struct dsa_switch_tree *dst,
struct dsa_port *dp, struct net_device *br);
+int dsa_tree_port_fdb_add(struct dsa_switch_tree *dst, struct dsa_port *dp,
+ const struct switchdev_obj_port_fdb *fdb,
+ struct switchdev_trans *trans);
+int dsa_tree_port_fdb_del(struct dsa_switch_tree *dst, struct dsa_port *dp,
+ const struct switchdev_obj_port_fdb *fdb);
+int dsa_tree_port_fdb_dump(struct dsa_switch_tree *dst, struct dsa_port *dp,
+  struct switchdev_obj_port_fdb *fdb,
+

1 2 3 >

1 - 100 of 242 matches

Mail list logo