date:20180419

[PATCH net] bnxt_en: Fix memory fault in bnxt_ethtool_init()

2018-04-19 Thread Michael Chan

From: Vasundhara Volam 

In some firmware images, the length of BNX_DIR_TYPE_PKG_LOG nvram type
could be greater than the fixed buffer length of 4096 bytes allocated by
the driver.  This was causing HWRM_NVM_READ to copy more data to the buffer
than the allocated size, causing general protection fault.

Fix the issue by allocating the exact buffer length returned by
HWRM_NVM_FIND_DIR_ENTRY, instead of 4096.  Move the kzalloc() call
into the bnxt_get_pkgver() function.

Fixes: 3ebf6f0a09a2 ("bnxt_en: Add installed-package firmware version reporting 
via Ethtool GDRVINFO")
Signed-off-by: Vasundhara Volam 
Signed-off-by: Michael Chan 
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c  | 49 --
 drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h |  2 -
 2 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 1f622ca..8ba14ae 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1927,22 +1927,39 @@ static char *bnxt_parse_pkglog(int desired_field, u8 
*data, size_t datalen)
return retval;
 }
 
-static char *bnxt_get_pkgver(struct net_device *dev, char *buf, size_t buflen)
+static void bnxt_get_pkgver(struct net_device *dev)
 {
+   struct bnxt *bp = netdev_priv(dev);
u16 index = 0;
-   u32 datalen;
+   char *pkgver;
+   u32 pkglen;
+   u8 *pkgbuf;
+   int len;
 
if (bnxt_find_nvram_item(dev, BNX_DIR_TYPE_PKG_LOG,
 BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE,
-&index, NULL, &datalen) != 0)
-   return NULL;
+&index, NULL, &pkglen) != 0)
+   return;
 
-   memset(buf, 0, buflen);
-   if (bnxt_get_nvram_item(dev, index, 0, datalen, buf) != 0)
-   return NULL;
+   pkgbuf = kzalloc(pkglen, GFP_KERNEL);
+   if (!pkgbuf) {
+   dev_err(&bp->pdev->dev, "Unable to allocate memory for pkg 
version, length = %u\n",
+   pkglen);
+   return;
+   }
+
+   if (bnxt_get_nvram_item(dev, index, 0, pkglen, pkgbuf))
+   goto err;
 
-   return bnxt_parse_pkglog(BNX_PKG_LOG_FIELD_IDX_PKG_VERSION, buf,
-   datalen);
+   pkgver = bnxt_parse_pkglog(BNX_PKG_LOG_FIELD_IDX_PKG_VERSION, pkgbuf,
+  pkglen);
+   if (pkgver && *pkgver != 0 && isdigit(*pkgver)) {
+   len = strlen(bp->fw_ver_str);
+   snprintf(bp->fw_ver_str + len, FW_VER_STR_LEN - len - 1,
+"/pkg %s", pkgver);
+   }
+err:
+   kfree(pkgbuf);
 }
 
 static int bnxt_get_eeprom(struct net_device *dev,
@@ -2615,22 +2632,10 @@ void bnxt_ethtool_init(struct bnxt *bp)
struct hwrm_selftest_qlist_input req = {0};
struct bnxt_test_info *test_info;
struct net_device *dev = bp->dev;
-   char *pkglog;
int i, rc;
 
-   pkglog = kzalloc(BNX_PKG_LOG_MAX_LENGTH, GFP_KERNEL);
-   if (pkglog) {
-   char *pkgver;
-   int len;
+   bnxt_get_pkgver(dev);
 
-   pkgver = bnxt_get_pkgver(dev, pkglog, BNX_PKG_LOG_MAX_LENGTH);
-   if (pkgver && *pkgver != 0 && isdigit(*pkgver)) {
-   len = strlen(bp->fw_ver_str);
-   snprintf(bp->fw_ver_str + len, FW_VER_STR_LEN - len - 1,
-"/pkg %s", pkgver);
-   }
-   kfree(pkglog);
-   }
if (bp->hwrm_spec_code < 0x10704 || !BNXT_SINGLE_PF(bp))
return;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h 
b/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h
index 73f2249..8344481 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h
@@ -59,8 +59,6 @@ enum bnxt_nvm_directory_type {
 #define BNX_DIR_ATTR_NO_CHKSUM (1 << 0)
 #define BNX_DIR_ATTR_PROP_STREAM   (1 << 1)
 
-#define BNX_PKG_LOG_MAX_LENGTH 4096
-
 enum bnxnvm_pkglog_field_index {
BNX_PKG_LOG_FIELD_IDX_INSTALLED_TIMESTAMP   = 0,
BNX_PKG_LOG_FIELD_IDX_PKG_DESCRIPTION   = 1,
-- 
1.8.3.1

Re: [RFC PATCH net-next v6 2/4] net: Introduce generic bypass module

2018-04-19 Thread Jiri Pirko

Thu, Apr 19, 2018 at 06:08:58AM CEST, m...@redhat.com wrote:
>On Wed, Apr 18, 2018 at 10:32:06PM +0200, Jiri Pirko wrote:
>> >> >> > With regards to alternate names for 'active', you suggested 
>> >> >> > 'stolen', but i
>> >> >> > am not too happy with it.
>> >> >> > netvsc uses vf_netdev, are you OK with this? Or another option is 
>> >> >> > 'passthru'
>> >> >> No. The netdev could be any netdevice. It does not have to be a "VF".
>> >> >> I think "stolen" is quite appropriate since it describes the modus
>> >> >> operandi. The bypass master steals some netdevice according to some
>> >> >> match.
>> >> >> 
>> >> >> But I don't insist on "stolen". Just sounds right.
>> >> >
>> >> >We are adding VIRTIO_NET_F_BACKUP as a new feature bit to enable this 
>> >> >feature, So i think
>> >> >'backup' name is consistent.
>> >> 
>> >> It perhaps makes sense from the view of virtio device. However, as I
>> >> described couple of times, for master/slave device the name "backup" is
>> >> highly misleading.
>> >
>> >virtio is the backup. You are supposed to use another
>> >(typically passthrough) device, if that fails use virtio.
>> >It does seem appropriate to me. If you like, we can
>> >change that to "standby".  Active I don't like either. "main"?
>> 
>> Sounds much better, yes.
>
>Excuse me, which of the versions are better in your eyes?

standby is okay. main/primary is fine too.

>
>
>> 
>> >
>> >In fact would failover be better than bypass?
>> 
>> Also, much better.
>>

[PATCH net-next v2 3/3] net: ethernet: ave: add support for phy-mode setting of system controller

2018-04-19 Thread Kunihiko Hayashi

This patch adds support for specifying system controller that configures
phy-mode setting.

According to the DT property "phy-mode", it's necessary to configure the
controller, which is used to choose the settings of the MAC suitable,
for example, mdio pin connections, internal clocks, and so on.

Supported phy-modes are SoC-dependent. The driver allows phy-mode to set
"internal" if the SoC has a built-in PHY, and {"mii", "rmii", "rgmii"}
if the SoC supports each mode. So we have to check whether the phy-mode
is valid or not.

This adds the following features for each SoC:
- check whether the SoC supports the specified phy-mode
- configure the controller accroding to phy-mode

The DT property accepts one argument to distinguish them for multiple MAC
instances.

ethernet@6500 {
...
socionext,syscon-phy-mode = <&soc_glue 0>;
};

ethernet@6520 {
...
socionext,syscon-phy-mode = <&soc_glue 1>;
};

Signed-off-by: Kunihiko Hayashi 
Signed-off-by: Masahiro Yamada 
---
 drivers/net/ethernet/socionext/Kconfig   |   2 +
 drivers/net/ethernet/socionext/sni_ave.c | 150 ---
 2 files changed, 140 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/socionext/Kconfig 
b/drivers/net/ethernet/socionext/Kconfig
index 6bcfe27..b80048c 100644
--- a/drivers/net/ethernet/socionext/Kconfig
+++ b/drivers/net/ethernet/socionext/Kconfig
@@ -14,6 +14,8 @@ if NET_VENDOR_SOCIONEXT
 config SNI_AVE
tristate "Socionext AVE ethernet support"
depends on (ARCH_UNIPHIER || COMPILE_TEST) && OF
+   depends on HAS_IOMEM
+   select MFD_SYSCON
select PHYLIB
---help---
  Driver for gigabit ethernet MACs, called AVE, in the
diff --git a/drivers/net/ethernet/socionext/sni_ave.c 
b/drivers/net/ethernet/socionext/sni_ave.c
index 52940bd..f7eccee 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -18,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -197,6 +199,11 @@
 #define AVE_INTM_COUNT 20
 #define AVE_FORCE_TXINTCNT 1
 
+/* SG */
+#define SG_ETPINMODE   0x540
+#define SG_ETPINMODE_EXTPHYBIT(1)  /* for LD11 */
+#define SG_ETPINMODE_RMII(ins) BIT(ins)
+
 #define IS_DESC_64BIT(p)   ((p)->data->is_desc_64bit)
 
 #define AVE_MAX_CLKS   4
@@ -228,12 +235,6 @@ struct ave_desc_info {
struct ave_desc *desc;  /* skb info related descriptor */
 };
 
-struct ave_soc_data {
-   boolis_desc_64bit;
-   const char  *clock_names[AVE_MAX_CLKS];
-   const char  *reset_names[AVE_MAX_RSTS];
-};
-
 struct ave_stats {
struct  u64_stats_sync  syncp;
u64 packets;
@@ -257,6 +258,9 @@ struct ave_private {
phy_interface_t phy_mode;
struct phy_device   *phydev;
struct mii_bus  *mdio;
+   struct regmap   *regmap;
+   unsigned intpinmode_mask;
+   unsigned intpinmode_val;
 
/* stats */
struct ave_statsstats_rx;
@@ -279,6 +283,14 @@ struct ave_private {
const struct ave_soc_data *data;
 };
 
+struct ave_soc_data {
+   boolis_desc_64bit;
+   const char  *clock_names[AVE_MAX_CLKS];
+   const char  *reset_names[AVE_MAX_RSTS];
+   int (*get_pinmode)(struct ave_private *priv,
+  phy_interface_t phy_mode, u32 arg);
+};
+
 static u32 ave_desc_read(struct net_device *ndev, enum desc_id id, int entry,
 int offset)
 {
@@ -1179,6 +1191,11 @@ static int ave_init(struct net_device *ndev)
}
}
 
+   ret = regmap_update_bits(priv->regmap, SG_ETPINMODE,
+priv->pinmode_mask, priv->pinmode_val);
+   if (ret)
+   return ret;
+
ave_global_reset(ndev);
 
mdio_np = of_get_child_by_name(np, "mdio");
@@ -1537,6 +1554,7 @@ static int ave_probe(struct platform_device *pdev)
const struct ave_soc_data *data;
struct device *dev = &pdev->dev;
char buf[ETHTOOL_FWVERS_LEN];
+   struct of_phandle_args args;
phy_interface_t phy_mode;
struct ave_private *priv;
struct net_device *ndev;
@@ -1559,12 +1577,6 @@ static int ave_probe(struct platform_device *pdev)
dev_err(dev, "phy-mode not found\n");
return -EINVAL;
}
-   if ((!phy_interface_mode_is_rgmii(phy_mode)) &&
-   phy_mode != PHY_INTERFACE_MODE_RMII &&
-   phy_mode != PHY_INTERFACE_MODE_MII) {
-   dev_err(dev, "phy-mode is invalid\n");
-   return -EINVAL;
-   }
 
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
@@ -1656,6 +1668,26 @@ static int ave_probe(struct platform_device *pdev)
priv->nrsts++;

[PATCH net-next v2 0/3] ave: fix the activation issues for some UniPhier SoCs

2018-04-19 Thread Kunihiko Hayashi

This add the following stuffs to fix the activation issues and satisfy
requirements for AVE ethernet driver implemented on some UniPhier SoCs.

- Add support for additional necessary clocks and resets, because the kernel
  is stalled on Pro4 due to lack of them.

- Check whether the SoC supports the specified phy-mode

- Add DT property support indicating system controller that has the feature
  for configurating phy-mode including built-in phy on LD11.

v1: https://www.spinics.net/lists/netdev/msg494904.html

Changes since v1:
- Add 'Reviewed-by' lines

Kunihiko Hayashi (3):
  net: ethernet: ave: add multiple clocks and resets support as required
property
  dt-bindings: net: ave: add syscon-phy-mode property to configure
phy-mode setting
  net: ethernet: ave: add support for phy-mode setting of system
controller

 .../bindings/net/socionext,uniphier-ave4.txt   |  19 +-
 drivers/net/ethernet/socionext/Kconfig |   2 +
 drivers/net/ethernet/socionext/sni_ave.c   | 252 ++---
 3 files changed, 238 insertions(+), 35 deletions(-)

-- 
2.7.4

[PATCH net-next v2 1/3] net: ethernet: ave: add multiple clocks and resets support as required property

2018-04-19 Thread Kunihiko Hayashi

When the link is becoming up for Pro4 SoC, the kernel is stalled
due to some missing clocks and resets.

The AVE block for Pro4 is connected to the GIO bus in the SoC.
Without its clock/reset, the access to the AVE register makes the
system stall.

In the same way, another MAC clock for Giga-bit Connection and
the PHY clock are also required for Pro4 to activate the Giga-bit feature
and to recognize the PHY.

To satisfy these requirements, this patch adds support for multiple clocks
and resets, and adds the clock-names and reset-names to the binding because
we need to distinguish clock/reset for the AVE main block and the others.

Also, make the resets a required property. Currently, "reset is
optional" relies on that the bootloader or firmware has deasserted
the reset before booting the kernel.  Drivers should work without
such expectation.

Fixes: 4c270b55a5af ("net: ethernet: socionext: add AVE ethernet driver")
Suggested-by: Masahiro Yamada 
Signed-off-by: Kunihiko Hayashi 
Reviewed-by: Rob Herring 
---
 .../bindings/net/socionext,uniphier-ave4.txt   |  13 ++-
 drivers/net/ethernet/socionext/sni_ave.c   | 108 -
 2 files changed, 96 insertions(+), 25 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt 
b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
index 96398cc..85e0c49 100644
--- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
+++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
@@ -17,9 +17,18 @@ Required properties:
  - phy-handle: Should point to the external phy device.
See ethernet.txt file in the same directory.
  - clocks: A phandle to the clock for the MAC.
+   For Pro4 SoC, that is "socionext,uniphier-pro4-ave4",
+   another MAC clock, GIO bus clock and PHY clock are also required.
+ - clock-names: Should contain
+   - "ether", "ether-gb", "gio", "ether-phy" for Pro4 SoC
+   - "ether" for others
+ - resets: A phandle to the reset control for the MAC. For Pro4 SoC,
+   GIO bus reset is also required.
+ - reset-names: Should contain
+   - "ether", "gio" for Pro4 SoC
+   - "ether" for others
 
 Optional properties:
- - resets: A phandle to the reset control for the MAC.
  - local-mac-address: See ethernet.txt in the same directory.
 
 Required subnode:
@@ -34,7 +43,9 @@ Example:
interrupts = <0 66 4>;
phy-mode = "rgmii";
phy-handle = <ðphy>;
+   clock-names = "ether";
clocks = <&sys_clk 6>;
+   reset-names = "ether";
resets = <&sys_rst 6>;
local-mac-address = [00 00 00 00 00 00];
 
diff --git a/drivers/net/ethernet/socionext/sni_ave.c 
b/drivers/net/ethernet/socionext/sni_ave.c
index 0b3b7a4..52940bd 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -199,6 +199,9 @@
 
 #define IS_DESC_64BIT(p)   ((p)->data->is_desc_64bit)
 
+#define AVE_MAX_CLKS   4
+#define AVE_MAX_RSTS   2
+
 enum desc_id {
AVE_DESCID_RX,
AVE_DESCID_TX,
@@ -227,6 +230,8 @@ struct ave_desc_info {
 
 struct ave_soc_data {
boolis_desc_64bit;
+   const char  *clock_names[AVE_MAX_CLKS];
+   const char  *reset_names[AVE_MAX_RSTS];
 };
 
 struct ave_stats {
@@ -245,8 +250,10 @@ struct ave_private {
int phy_id;
unsigned intdesc_size;
u32 msg_enable;
-   struct clk  *clk;
-   struct reset_control*rst;
+   int nclks;
+   struct clk  *clk[AVE_MAX_CLKS];
+   int nrsts;
+   struct reset_control*rst[AVE_MAX_RSTS];
phy_interface_t phy_mode;
struct phy_device   *phydev;
struct mii_bus  *mdio;
@@ -1153,18 +1160,23 @@ static int ave_init(struct net_device *ndev)
struct device_node *np = dev->of_node;
struct device_node *mdio_np;
struct phy_device *phydev;
-   int ret;
+   int nc, nr, ret;
 
/* enable clk because of hw access until ndo_open */
-   ret = clk_prepare_enable(priv->clk);
-   if (ret) {
-   dev_err(dev, "can't enable clock\n");
-   return ret;
+   for (nc = 0; nc < priv->nclks; nc++) {
+   ret = clk_prepare_enable(priv->clk[nc]);
+   if (ret) {
+   dev_err(dev, "can't enable clock\n");
+   goto out_clk_disable;
+   }
}
-   ret = reset_control_deassert(priv->rst);
-   if (ret) {
-   dev_err(dev, "can't deassert reset\n");
-   goto out_clk_disable;
+
+   for (nr = 0; nr < priv->nrsts; nr++) {
+   ret = reset_control_deassert(priv->rst[nr]);
+   if (ret) {
+   dev_err(dev, "can't deassert r

[PATCH net-next v2 2/3] dt-bindings: net: ave: add syscon-phy-mode property to configure phy-mode setting

2018-04-19 Thread Kunihiko Hayashi

Add "socionext,syscon-phy-mode" property to specify system controller that
configures the settings about phy-mode.

Signed-off-by: Kunihiko Hayashi 
Reviewed-by: Rob Herring 
---
 Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt 
b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
index 85e0c49..fc8f017 100644
--- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
+++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
@@ -13,7 +13,8 @@ Required properties:
  - reg: Address where registers are mapped and size of region.
  - interrupts: Should contain the MAC interrupt.
  - phy-mode: See ethernet.txt in the same directory. Allow to choose
-   "rgmii", "rmii", or "mii" according to the PHY.
+   "rgmii", "rmii", "mii", or "internal" according to the PHY.
+   The acceptable mode is SoC-dependent.
  - phy-handle: Should point to the external phy device.
See ethernet.txt file in the same directory.
  - clocks: A phandle to the clock for the MAC.
@@ -27,6 +28,8 @@ Required properties:
  - reset-names: Should contain
- "ether", "gio" for Pro4 SoC
- "ether" for others
+ - socionext,syscon-phy-mode: A phandle to syscon with one argument
+   that configures phy mode. The argument is the ID of MAC instance.
 
 Optional properties:
  - local-mac-address: See ethernet.txt in the same directory.
@@ -47,6 +50,7 @@ Example:
clocks = <&sys_clk 6>;
reset-names = "ether";
resets = <&sys_rst 6>;
+   socionext,syscon-phy-mode = <&soc_glue 0>;
local-mac-address = [00 00 00 00 00 00];
 
mdio {
-- 
2.7.4

Greetings

2018-04-19 Thread Miss Zeliha ömer faruk




Hello

Greetings to you please i have a business proposal for you contact me
for more detailes asap thanks.

Best Regards,
Miss.Zeliha ömer faruk
Esentepe Mahallesi Büyükdere
Caddesi Kristal Kule Binasi
No:215
Sisli - Istanbul, Turkey

Re: [PATCH net-next 2/2] udp: implement and use per cpu rx skbs cache

2018-04-19 Thread Paolo Abeni

Hi,

On Wed, 2018-04-18 at 12:21 -0700, Eric Dumazet wrote:
> 
> On 04/18/2018 10:15 AM, Paolo Abeni wrote:
> is not appealing to me :/
> > 
> > Thank you for the feedback.
> > Sorry for not being clear about it, but knotd is using SO_REUSEPORT and
> > the above tests are leveraging it.
> > 
> > That 5% is on top of that 300%.
> 
> Then there is something wrong.
> 
> Adding copies should not increase performance.

The skb and data are copied into the UDP skb cache only if the socket
is under memory pressure, and that happens if and only if the receiver
is slower than the BH/IP receive path.

The copy slows down the RX path - which was dropping packets - and
makes the udp_recvmsg() considerably faster, as consuming skb becomes
almost a no-op.

AFAICS, this is similar to the strategy you used in:

ommit c8c8b127091b758f5768f906bcdeeb88bc9951ca
Author: Eric Dumazet 
Date:   Wed Dec 7 09:19:33 2016 -0800

udp: under rx pressure, try to condense skbs

with the difference that with the UDP skb cache there is an hard limit
to the amount of memory the BH is allowed to copy.

> If it does, there is certainly another way, reaching 10% instead of 5%

I benchmarked vs a DNS server to test and verify that we get measurable
benefits in real life scenario. The measured performance gain for the
RX path with reasonable configurations is ~20%.

Any suggestions for better results are more than welcome!

Cheers,

Paolo

Re: [PATCH net] virtio_net: split out ctrl buffer

2018-04-19 Thread kbuild test robot

Hi Michael,

I love your patch! Yet something to improve:

[auto build test ERROR on net/master]

url:
https://github.com/0day-ci/linux/commits/Michael-S-Tsirkin/virtio_net-split-out-ctrl-buffer/20180419-145754
config: x86_64-randconfig-x006-201815 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All errors (new ones prefixed by >>):

   drivers//net/virtio_net.c: In function 'virtnet_free_queues':
>> drivers//net/virtio_net.c:2365:12: error: 'struct virtnet_info' has no 
>> member named 'err_ctrl'; did you mean 'ctrl'?
 kfree(vi->err_ctrl);
   ^~~~
   ctrl
   drivers//net/virtio_net.c: In function 'virtnet_alloc_queues':
>> drivers//net/virtio_net.c:2589:8: error: 'vq' undeclared (first use in this 
>> function); did you mean 'vi'?
 kfree(vq->ctrl);
   ^~
   vi
   drivers//net/virtio_net.c:2589:8: note: each undeclared identifier is 
reported only once for each function it appears in

vim +2365 drivers//net/virtio_net.c

  2347  
  2348  static void virtnet_free_queues(struct virtnet_info *vi)
  2349  {
  2350  int i;
  2351  
  2352  for (i = 0; i < vi->max_queue_pairs; i++) {
  2353  napi_hash_del(&vi->rq[i].napi);
  2354  netif_napi_del(&vi->rq[i].napi);
  2355  netif_napi_del(&vi->sq[i].napi);
  2356  }
  2357  
  2358  /* We called napi_hash_del() before netif_napi_del(),
  2359   * we need to respect an RCU grace period before freeing vi->rq
  2360   */
  2361  synchronize_net();
  2362  
  2363  kfree(vi->rq);
  2364  kfree(vi->sq);
> 2365  kfree(vi->err_ctrl);
  2366  }
  2367  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

Re: [net PATCH v2] net: sched, fix OOO packets with pfifo_fast

2018-04-19 Thread Paolo Abeni

On Wed, 2018-04-18 at 09:44 -0700, John Fastabend wrote:
> Thanks for bringing this up. I'll think about it for a bit maybe
> there is something we can do here. There is a set of conditions
> that if met we can run without the lock. Possibly ONETXQUEUE and
> aligned cpu_map is sufficient. 

I think you mean "root qdisc is mq and aligned cpu_map": AFAICS we can
have ONETXQUEUE when root qdisc is e.g. pfifo_fast which would not help
here.

> We could detect this case and drop
> the locking. For existing systems and high Gbps NICs I think (feel
> free to correct me) assuming a core per cpu is OK. 

I'm sorry, I'm lost. Do you mean "a tx queue per core" instead ?!? 

I'm unsure we can assume the above. In my experiments, at least in some
scenarios it's preferrable configuring a limited number of rx/tx
queues, confine BH processing to the related cores and let user space
processes run on the others, with a many to 1 relationship between the
cores "assigned" to user-space and the cores "assigned" to BH
processing. 

Can't we somewhat try to leverage TCQ_F_CAN_BYPASS even with NOLOCK
qdisc? I *think* we can avoid the qdisc_run() call after
sch_direct_xmit() in the bypass scenario, and that will avoid the
blamed atomic ops above.

Cheers,

Paolo

[PATCH] net: phy: marvell: clear wol event before setting it

2018-04-19 Thread Jisheng Zhang

From: Jingju Hou 

If WOL event happened once, the LED[2] interrupt pin will not be
cleared unless reading the CSISR register. So clear the WOL event
before enabling it.

Signed-off-by: Jingju Hou 
Signed-off-by: Jisheng Zhang 
---
 drivers/net/phy/marvell.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index c22e8e383247..b6abe1cbc84b 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -115,6 +115,9 @@
 /* WOL Event Interrupt Enable */
 #define MII_88E1318S_PHY_CSIER_WOL_EIE BIT(7)
 
+/* Copper Specific Interrupt Status Register */
+#define MII_88E1318S_PHY_CSISR 0x13
+
 /* LED Timer Control Register */
 #define MII_88E1318S_PHY_LED_TCR   0x12
 #define MII_88E1318S_PHY_LED_TCR_FORCE_INT BIT(15)
@@ -1393,6 +1396,12 @@ static int m88e1318_set_wol(struct phy_device *phydev,
if (err < 0)
goto error;
 
+   /* If WOL event happened once, the LED[2] interrupt pin
+* will not be cleared unless reading the CSISR register.
+* So clear the WOL event first before enabling it.
+*/
+   phy_read(phydev, MII_88E1318S_PHY_CSISR);
+
/* Enable the WOL interrupt */
err = __phy_modify(phydev, MII_88E1318S_PHY_CSIER, 0,
   MII_88E1318S_PHY_CSIER_WOL_EIE);
-- 
2.17.0

Re: [PATCH net] virtio_net: split out ctrl buffer

2018-04-19 Thread kbuild test robot

Hi Michael,

I love your patch! Yet something to improve:

[auto build test ERROR on net/master]

url:
https://github.com/0day-ci/linux/commits/Michael-S-Tsirkin/virtio_net-split-out-ctrl-buffer/20180419-145754
config: i386-randconfig-a0-201815 (attached as .config)
compiler: gcc-4.9 (Debian 4.9.4-2) 4.9.4
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   drivers/net/virtio_net.c: In function 'virtnet_free_queues':
>> drivers/net/virtio_net.c:2365:10: error: 'struct virtnet_info' has no member 
>> named 'err_ctrl'
 kfree(vi->err_ctrl);
 ^
   drivers/net/virtio_net.c: In function 'virtnet_alloc_queues':
>> drivers/net/virtio_net.c:2589:8: error: 'vq' undeclared (first use in this 
>> function)
 kfree(vq->ctrl);
   ^
   drivers/net/virtio_net.c:2589:8: note: each undeclared identifier is 
reported only once for each function it appears in

vim +2365 drivers/net/virtio_net.c

  2347  
  2348  static void virtnet_free_queues(struct virtnet_info *vi)
  2349  {
  2350  int i;
  2351  
  2352  for (i = 0; i < vi->max_queue_pairs; i++) {
  2353  napi_hash_del(&vi->rq[i].napi);
  2354  netif_napi_del(&vi->rq[i].napi);
  2355  netif_napi_del(&vi->sq[i].napi);
  2356  }
  2357  
  2358  /* We called napi_hash_del() before netif_napi_del(),
  2359   * we need to respect an RCU grace period before freeing vi->rq
  2360   */
  2361  synchronize_net();
  2362  
  2363  kfree(vi->rq);
  2364  kfree(vi->sq);
> 2365  kfree(vi->err_ctrl);
  2366  }
  2367  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

Re: [PATCH net-next v4 1/3] vmcore: add API to collect hardware dump in second kernel

2018-04-19 Thread Greg KH

On Tue, Apr 17, 2018 at 01:14:17PM +0530, Rahul Lakkireddy wrote:
> +config PROC_VMCORE_DEVICE_DUMP
> + bool "Device Hardware/Firmware Log Collection"
> + depends on PROC_VMCORE
> + default y

Only things that require the machine to keep working should be 'default
y', please remove this, it's an option.

> + help
> +   Device drivers can collect the device specific snapshot of
> +   their hardware or firmware before they are initialized in
> +   crash recovery kernel. If you say Y here, the device dumps
> +   will be added as ELF notes to /proc/vmcore

Which exact "device drivers" are you referring to here?

thanks,

greg k-h

[PATCH] net: phy: TLK10X initial driver submission

2018-04-19 Thread Måns Andersson

From: Mans Andersson 

Add suport for the TI TLK105 and TLK106 10/100Mbit ethernet phys.

In addition the TLK10X needs to be removed from DP83848 driver as the
power back off support is added here for this device.

Datasheet:
http://www.ti.com/lit/gpn/tlk106
---
 .../devicetree/bindings/net/ti,tlk10x.txt  |  27 +++
 drivers/net/phy/Kconfig|   5 +
 drivers/net/phy/Makefile   |   1 +
 drivers/net/phy/dp83848.c  |   3 -
 drivers/net/phy/tlk10x.c   | 209 +
 5 files changed, 242 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/ti,tlk10x.txt
 create mode 100644 drivers/net/phy/tlk10x.c

diff --git a/Documentation/devicetree/bindings/net/ti,tlk10x.txt 
b/Documentation/devicetree/bindings/net/ti,tlk10x.txt
new file mode 100644
index 000..371d0d7
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ti,tlk10x.txt
@@ -0,0 +1,27 @@
+* Texas Instruments - TLK105 / TLK106 ethernet PHYs
+
+Required properties:
+   - reg - The ID number for the phy, usually a small integer
+
+Optional properties:
+   - ti,power-back-off - Power Back Off Level
+   Please refer to data sheet chapter 8.6 and TI Application
+   Note SLLA3228
+   0 - Normal Operation
+   1 - Level 1 (up to 140m cable between TLK link partners)
+   2 - Level 2 (up to 100m cable between TLK link partners)
+   3 - Level 3 (up to 80m cable between TLK link partners)
+
+Default child nodes are standard Ethernet PHY device
+nodes as described in Documentation/devicetree/bindings/net/phy.txt
+
+Example:
+
+   ethernet-phy@0 {
+   reg = <0>;
+   ti,power-back-off = <2>;
+   };
+
+Datasheets and documentation can be found at:
+http://www.ti.com/lit/gpn/tlk106
+http://www.ti.com/lit/an/slla328/slla328.pdf
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index bdfbabb..c980240 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -295,6 +295,11 @@ config DP83867_PHY
---help---
  Currently supports the DP83867 PHY.
 
+config TLK10X_PHY
+   tristate "Texas Instruments TLK10x PHY"
+   ---help---
+ Supports the TLK105 and TLK106 PHYs.
+
 config FIXED_PHY
tristate "MDIO Bus/PHY emulation with fixed speed/link PHYs"
depends on PHYLIB
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 01acbcb..37e4e02 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -79,5 +79,6 @@ obj-$(CONFIG_ROCKCHIP_PHY)+= rockchip.o
 obj-$(CONFIG_SMSC_PHY) += smsc.o
 obj-$(CONFIG_STE10XP)  += ste10Xp.o
 obj-$(CONFIG_TERANETICS_PHY)   += teranetics.o
+obj-$(CONFIG_TLK10X_PHY)   += tlk10x.o
 obj-$(CONFIG_VITESSE_PHY)  += vitesse.o
 obj-$(CONFIG_XILINX_GMII2RGMII) += xilinx_gmii2rgmii.o
diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c
index cd09c3a..435f401 100644
--- a/drivers/net/phy/dp83848.c
+++ b/drivers/net/phy/dp83848.c
@@ -19,7 +19,6 @@
 #define TI_DP83848C_PHY_ID 0x20005ca0
 #define TI_DP83620_PHY_ID  0x20005ce0
 #define NS_DP83848C_PHY_ID 0x20005c90
-#define TLK10X_PHY_ID  0x2000a210
 
 /* Registers */
 #define DP83848_MICR   0x11 /* MII Interrupt Control Register 
*/
@@ -78,7 +77,6 @@ static struct mdio_device_id __maybe_unused dp83848_tbl[] = {
{ TI_DP83848C_PHY_ID, 0xfff0 },
{ NS_DP83848C_PHY_ID, 0xfff0 },
{ TI_DP83620_PHY_ID, 0xfff0 },
-   { TLK10X_PHY_ID, 0xfff0 },
{ }
 };
 MODULE_DEVICE_TABLE(mdio, dp83848_tbl);
@@ -105,7 +103,6 @@ static struct phy_driver dp83848_driver[] = {
DP83848_PHY_DRIVER(TI_DP83848C_PHY_ID, "TI DP83848C 10/100 Mbps PHY"),
DP83848_PHY_DRIVER(NS_DP83848C_PHY_ID, "NS DP83848C 10/100 Mbps PHY"),
DP83848_PHY_DRIVER(TI_DP83620_PHY_ID, "TI DP83620 10/100 Mbps PHY"),
-   DP83848_PHY_DRIVER(TLK10X_PHY_ID, "TI TLK10X 10/100 Mbps PHY"),
 };
 module_phy_driver(dp83848_driver);
 
diff --git a/drivers/net/phy/tlk10x.c b/drivers/net/phy/tlk10x.c
new file mode 100644
index 000..1efc81e
--- /dev/null
+++ b/drivers/net/phy/tlk10x.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * Driver for the Texas Instruments TLK105 / TLK106
+ *
+ * Copyright (C) 2018 NIBE Industrier AB - http://www.nibe.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#i

RE: [PATCH] net: phy: marvell: clear wol event before setting it

2018-04-19 Thread Bhadram Varka

Hi,

> -Original Message-
> From: netdev-ow...@vger.kernel.org  On
> Behalf Of Jisheng Zhang
> Sent: Thursday, April 19, 2018 1:33 PM
> To: Andrew Lunn ; Florian Fainelli ;
> David S. Miller 
> Cc: netdev@vger.kernel.org; linux-ker...@vger.kernel.org; Jingju Hou
> 
> Subject: [PATCH] net: phy: marvell: clear wol event before setting it
> 
> From: Jingju Hou 
> 
> If WOL event happened once, the LED[2] interrupt pin will not be cleared 
> unless
> reading the CSISR register. So clear the WOL event before enabling it.
> 
> Signed-off-by: Jingju Hou 
> Signed-off-by: Jisheng Zhang 
> ---
>  drivers/net/phy/marvell.c | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index
> c22e8e383247..b6abe1cbc84b 100644
> --- a/drivers/net/phy/marvell.c
> +++ b/drivers/net/phy/marvell.c
> @@ -115,6 +115,9 @@
>  /* WOL Event Interrupt Enable */
>  #define MII_88E1318S_PHY_CSIER_WOL_EIE   BIT(7)
> 
> +/* Copper Specific Interrupt Status Register */
> +#define MII_88E1318S_PHY_CSISR   0x13
> +

There is already macro to represent this register - MII_M1011_IEVENT. Do we 
need this macro ?

>  /* LED Timer Control Register */
>  #define MII_88E1318S_PHY_LED_TCR 0x12
>  #define MII_88E1318S_PHY_LED_TCR_FORCE_INT   BIT(15)
> @@ -1393,6 +1396,12 @@ static int m88e1318_set_wol(struct phy_device
> *phydev,
>   if (err < 0)
>   goto error;
> 
> + /* If WOL event happened once, the LED[2] interrupt pin
> +  * will not be cleared unless reading the CSISR register.
> +  * So clear the WOL event first before enabling it.
> +  */
> + phy_read(phydev, MII_88E1318S_PHY_CSISR);

This part of the operation already taken care by ack_interrupt and did_interrupt
[]
.ack_interrupt = &marvell_ack_interrupt,
.did_interrupt = &m88e1121_did_interrupt,
[...]

If at all WOL event occurred marvell_ack_interrupt will take care of clearing 
the interrupt status register.
Am I missing anything here ?

>   /* Enable the WOL interrupt */
>   err = __phy_modify(phydev, MII_88E1318S_PHY_CSIER, 0,
>  MII_88E1318S_PHY_CSIER_WOL_EIE);
> --
> 2.17.0

Re: [PATCH] net: phy: marvell: clear wol event before setting it

2018-04-19 Thread Jisheng Zhang

Hi,

On Thu, 19 Apr 2018 08:38:45 + Bhadram Varka wrote:

> Hi,
> 
> > -Original Message-
> > From: netdev-ow...@vger.kernel.org  On
> > Behalf Of Jisheng Zhang
> > Sent: Thursday, April 19, 2018 1:33 PM
> > To: Andrew Lunn ; Florian Fainelli ;
> > David S. Miller 
> > Cc: netdev@vger.kernel.org; linux-ker...@vger.kernel.org; Jingju Hou
> > 
> > Subject: [PATCH] net: phy: marvell: clear wol event before setting it
> > 
> > From: Jingju Hou 
> > 
> > If WOL event happened once, the LED[2] interrupt pin will not be cleared 
> > unless
> > reading the CSISR register. So clear the WOL event before enabling it.
> > 
> > Signed-off-by: Jingju Hou 
> > Signed-off-by: Jisheng Zhang 
> > ---
> >  drivers/net/phy/marvell.c | 9 +
> >  1 file changed, 9 insertions(+)
> > 
> > diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index
> > c22e8e383247..b6abe1cbc84b 100644
> > --- a/drivers/net/phy/marvell.c
> > +++ b/drivers/net/phy/marvell.c
> > @@ -115,6 +115,9 @@
> >  /* WOL Event Interrupt Enable */
> >  #define MII_88E1318S_PHY_CSIER_WOL_EIE BIT(7)
> > 
> > +/* Copper Specific Interrupt Status Register */
> > +#define MII_88E1318S_PHY_CSISR 0x13
> > +  
> 
> There is already macro to represent this register - MII_M1011_IEVENT. Do we 
> need this macro ?

Good point. Will use MII_M1011_IEVENT instead in v2.

> 
> >  /* LED Timer Control Register */
> >  #define MII_88E1318S_PHY_LED_TCR   0x12
> >  #define MII_88E1318S_PHY_LED_TCR_FORCE_INT BIT(15)
> > @@ -1393,6 +1396,12 @@ static int m88e1318_set_wol(struct phy_device
> > *phydev,
> > if (err < 0)
> > goto error;
> > 
> > +   /* If WOL event happened once, the LED[2] interrupt pin
> > +* will not be cleared unless reading the CSISR register.
> > +* So clear the WOL event first before enabling it.
> > +*/
> > +   phy_read(phydev, MII_88E1318S_PHY_CSISR);  
> 
> This part of the operation already taken care by ack_interrupt and 
> did_interrupt
> []
> .ack_interrupt = &marvell_ack_interrupt,
> .did_interrupt = &m88e1121_did_interrupt,
> [...]
> 
> If at all WOL event occurred marvell_ack_interrupt will take care of clearing 
> the interrupt status register.
> Am I missing anything here ?

If there's no valid irq for phy, the ack_interrupt/did_interrupt won't
be called.


Thanks

[GIT PULL 0/5] IPVS Updates for v4.18

2018-04-19 Thread Simon Horman

Hi Pablo,

please consider these IPVS enhancements for v4.18.

* Whitepace cleanup

* Add Maglev hashing algorithm as a IPVS scheduler

  Inju Song says "Implements the Google's Maglev hashing algorithm as a
  IPVS scheduler.  Basically it provides consistent hashing but offers some
  special features about disruption and load balancing.

  1) minimal disruption: when the set of destinations changes,
 a connection will likely be sent to the same destination
 as it was before.

  2) load balancing: each destination will receive an almost
 equal number of connections.

 Seel also: [3.4 Consistent Hasing] in
 https://www.usenix.org/system/files/conference/nsdi16/nsdi16-paper-eisenbud.pdf
 "

* Fix to correct implementation of Knuth's multiplicative hashing
  which is used in sh/dh/lblc/lblcr algorithms. Instead the
  implementation provided by the hash_32() macro is used.

The following changes since commit 159f02977b2feb18a4bece5e586c838a6d26d44b:

  Merge branch 'net-mvneta-improve-suspend-resume' (2018-04-02 11:14:03 -0400)

are available in the git repository at:

  http://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git 
tags/ipvs-for-v4.18

for you to fetch changes up to 9a17740e0ea1c9b1edd89836bb27c76272f54641:

  ipvs: fix multiplicative hashing in sh/dh/lblc/lblcr algorithms (2018-04-09 
10:15:27 +0300)


Arvind Yadav (1):
  netfilter: ipvs: Fix space before '[' error.

Inju Song (3):
  netfilter: ipvs: Keep latest weight of destination
  netfilter: ipvs: Add Maglev hashing scheduler
  netfilter: ipvs: Add configurations of Maglev hashing

Vincent Bernat (1):
  ipvs: fix multiplicative hashing in sh/dh/lblc/lblcr algorithms

 include/net/ip_vs.h  |   1 +
 net/netfilter/ipvs/Kconfig   |  37 +++
 net/netfilter/ipvs/Makefile  |   1 +
 net/netfilter/ipvs/ip_vs_ctl.c   |   4 +
 net/netfilter/ipvs/ip_vs_dh.c|   3 +-
 net/netfilter/ipvs/ip_vs_lblc.c  |   3 +-
 net/netfilter/ipvs/ip_vs_lblcr.c |   3 +-
 net/netfilter/ipvs/ip_vs_mh.c| 540 +++
 net/netfilter/ipvs/ip_vs_proto_tcp.c |   4 +-
 net/netfilter/ipvs/ip_vs_sh.c|   3 +-
 10 files changed, 593 insertions(+), 6 deletions(-)
 create mode 100644 net/netfilter/ipvs/ip_vs_mh.c

[PATCH 2/5] netfilter: ipvs: Keep latest weight of destination

2018-04-19 Thread Simon Horman

From: Inju Song 

The hashing table in scheduler such as source hash or maglev hash
should ignore the changed weight to 0 and allow changing the weight
from/to non-0 values. So, struct ip_vs_dest needs to keep weight
with latest non-0 weight.

Signed-off-by: Inju Song 
Signed-off-by: Julian Anastasov 
Signed-off-by: Simon Horman 
---
 include/net/ip_vs.h| 1 +
 net/netfilter/ipvs/ip_vs_ctl.c | 4 
 2 files changed, 5 insertions(+)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index eb0bec043c96..0ac795b41ab8 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -668,6 +668,7 @@ struct ip_vs_dest {
volatile unsigned int   flags;  /* dest status flags */
atomic_tconn_flags; /* flags to copy to conn */
atomic_tweight; /* server weight */
+   atomic_tlast_weight;/* server latest weight */
 
refcount_t  refcnt; /* reference counter */
struct ip_vs_stats  stats;  /* statistics */
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5ebde4b15810..b91bb70ece92 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -821,6 +821,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct 
ip_vs_dest *dest,
if (add && udest->af != svc->af)
ipvs->mixed_address_family_dests++;
 
+   /* keep the last_weight with latest non-0 weight */
+   if (add || udest->weight != 0)
+   atomic_set(&dest->last_weight, udest->weight);
+
/* set the weight and the flags */
atomic_set(&dest->weight, udest->weight);
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
-- 
2.11.0

[PATCH 1/5] netfilter: ipvs: Fix space before '[' error.

2018-04-19 Thread Simon Horman

From: Arvind Yadav 

Fix checkpatch.pl error:
ERROR: space prohibited before open square bracket '['.

Signed-off-by: Arvind Yadav 
Signed-off-by: Simon Horman 
---
 net/netfilter/ipvs/ip_vs_proto_tcp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c 
b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index bcd9b7bde4ee..569631d2b2a1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -436,7 +436,7 @@ static bool tcp_state_active(int state)
return tcp_state_active_table[state];
 }
 
-static struct tcp_states_t tcp_states [] = {
+static struct tcp_states_t tcp_states[] = {
 /* INPUT */
 /*sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA*/
 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
@@ -459,7 +459,7 @@ static struct tcp_states_t tcp_states [] = {
 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 };
 
-static struct tcp_states_t tcp_states_dos [] = {
+static struct tcp_states_t tcp_states_dos[] = {
 /* INPUT */
 /*sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA*/
 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
-- 
2.11.0

[PATCH 4/5] netfilter: ipvs: Add configurations of Maglev hashing

2018-04-19 Thread Simon Horman

From: Inju Song 

To build the maglev hashing scheduler, add some configuration
to Kconfig and Makefile.

 - The compile configurations of MH are added to the Kconfig.

 - The MH build rule is added to the Makefile.

Signed-off-by: Inju Song 
Signed-off-by: Julian Anastasov 
Signed-off-by: Simon Horman 
---
 net/netfilter/ipvs/Kconfig  | 37 +
 net/netfilter/ipvs/Makefile |  1 +
 2 files changed, 38 insertions(+)

diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index b32fb0dbe237..05dc1b77e466 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -225,6 +225,25 @@ config IP_VS_SH
  If you want to compile it in kernel, say Y. To compile it as a
  module, choose M here. If unsure, say N.
 
+config IP_VS_MH
+   tristate "maglev hashing scheduling"
+   ---help---
+ The maglev consistent hashing scheduling algorithm provides the
+ Google's Maglev hashing algorithm as a IPVS scheduler. It assigns
+ network connections to the servers through looking up a statically
+ assigned special hash table called the lookup table. Maglev hashing
+ is to assign a preference list of all the lookup table positions
+ to each destination.
+
+ Through this operation, The maglev hashing gives an almost equal
+ share of the lookup table to each of the destinations and provides
+ minimal disruption by using the lookup table. When the set of
+ destinations changes, a connection will likely be sent to the same
+ destination as it was before.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
 config IP_VS_SED
tristate "shortest expected delay scheduling"
---help---
@@ -266,6 +285,24 @@ config IP_VS_SH_TAB_BITS
  needs to be large enough to effectively fit all the destinations
  multiplied by their respective weights.
 
+comment 'IPVS MH scheduler'
+
+config IP_VS_MH_TAB_INDEX
+   int "IPVS maglev hashing table index of size (the prime numbers)"
+   range 8 17
+   default 12
+   ---help---
+ The maglev hashing scheduler maps source IPs to destinations
+ stored in a hash table. This table is assigned by a preference
+ list of the positions to each destination until all slots in
+ the table are filled. The index determines the prime for size of
+ the table as�251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
+ 65521 or 131071.�When using weights to allow destinations to
+ receive more connections,�the table is assigned an amount
+ proportional to the weights specified.�The table needs to be large
+ enough to effectively fit all the destinations multiplied by their
+ respective weights.
+
 comment 'IPVS application helper'
 
 config IP_VS_FTP
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index c552993fa4b9..bfce2677fda2 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
 obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
 obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
 obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
+obj-$(CONFIG_IP_VS_MH) += ip_vs_mh.o
 obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
 obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
 
-- 
2.11.0

[PATCH 3/5] netfilter: ipvs: Add Maglev hashing scheduler

2018-04-19 Thread Simon Horman

From: Inju Song 

Implements the Google's Maglev hashing algorithm as a IPVS scheduler.

Basically it provides consistent hashing but offers some special
features about disruption and load balancing.

 1) minimal disruption: when the set of destinations changes,
a connection will likely be sent to the same destination
as it was before.

 2) load balancing: each destination will receive an almost
equal number of connections.

Seel also for detail: [3.4 Consistent Hasing] in
https://www.usenix.org/system/files/conference/nsdi16/nsdi16-paper-eisenbud.pdf

Signed-off-by: Inju Song 
Signed-off-by: Julian Anastasov 
Signed-off-by: Simon Horman 
---
 net/netfilter/ipvs/ip_vs_mh.c | 540 ++
 1 file changed, 540 insertions(+)
 create mode 100644 net/netfilter/ipvs/ip_vs_mh.c

diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c
new file mode 100644
index ..0f795b186eb3
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_mh.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0
+/* IPVS:   Maglev Hashing scheduling module
+ *
+ * Authors:Inju Song 
+ *
+ */
+
+/* The mh algorithm is to assign�a preference list of all the lookup
+ * table positions to each destination and populate the table with
+ * the most-preferred position of destinations. Then it is to select
+ * destination with the hash key of source IP address�through looking
+ * up a the lookup table.
+ *
+ * The algorithm is detailed in:
+ * [3.4 Consistent Hasing]
+https://www.usenix.org/system/files/conference/nsdi16/nsdi16-paper-eisenbud.pdf
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include 
+#include 
+#include 
+
+#define IP_VS_SVC_F_SCHED_MH_FALLBACK  IP_VS_SVC_F_SCHED1 /* MH fallback */
+#define IP_VS_SVC_F_SCHED_MH_PORT  IP_VS_SVC_F_SCHED2 /* MH use port */
+
+struct ip_vs_mh_lookup {
+   struct ip_vs_dest __rcu *dest;  /* real server (cache) */
+};
+
+struct ip_vs_mh_dest_setup {
+   unsigned intoffset; /* starting offset */
+   unsigned intskip;   /* skip */
+   unsigned intperm;   /* next_offset */
+   int turns;  /* weight / gcd() and rshift */
+};
+
+/* Available prime numbers for MH table */
+static int primes[] = {251, 509, 1021, 2039, 4093,
+  8191, 16381, 32749, 65521, 131071};
+
+/* For IPVS MH entry hash table */
+#ifndef CONFIG_IP_VS_MH_TAB_INDEX
+#define CONFIG_IP_VS_MH_TAB_INDEX  12
+#endif
+#define IP_VS_MH_TAB_BITS  (CONFIG_IP_VS_MH_TAB_INDEX / 2)
+#define IP_VS_MH_TAB_INDEX (CONFIG_IP_VS_MH_TAB_INDEX - 8)
+#define IP_VS_MH_TAB_SIZE   primes[IP_VS_MH_TAB_INDEX]
+
+struct ip_vs_mh_state {
+   struct rcu_head rcu_head;
+   struct ip_vs_mh_lookup  *lookup;
+   struct ip_vs_mh_dest_setup  *dest_setup;
+   hsiphash_key_t  hash1, hash2;
+   int gcd;
+   int rshift;
+};
+
+static inline void generate_hash_secret(hsiphash_key_t *hash1,
+   hsiphash_key_t *hash2)
+{
+   hash1->key[0] = 2654435761UL;
+   hash1->key[1] = 2654435761UL;
+
+   hash2->key[0] = 2654446892UL;
+   hash2->key[1] = 2654446892UL;
+}
+
+/* Helper function to determine if server is unavailable */
+static inline bool is_unavailable(struct ip_vs_dest *dest)
+{
+   return atomic_read(&dest->weight) <= 0 ||
+  dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
+/* Returns hash value for IPVS MH entry */
+static inline unsigned int
+ip_vs_mh_hashkey(int af, const union nf_inet_addr *addr,
+__be16 port, hsiphash_key_t *key, unsigned int offset)
+{
+   unsigned int v;
+   __be32 addr_fold = addr->ip;
+
+#ifdef CONFIG_IP_VS_IPV6
+   if (af == AF_INET6)
+   addr_fold = addr->ip6[0] ^ addr->ip6[1] ^
+   addr->ip6[2] ^ addr->ip6[3];
+#endif
+   v = (offset + ntohs(port) + ntohl(addr_fold));
+   return hsiphash(&v, sizeof(v), key);
+}
+
+/* Reset all the hash buckets of the specified table. */
+static void ip_vs_mh_reset(struct ip_vs_mh_state *s)
+{
+   int i;
+   struct ip_vs_mh_lookup *l;
+   struct ip_vs_dest *dest;
+
+   l = &s->lookup[0];
+   for (i = 0; i < IP_VS_MH_TAB_SIZE; i++) {
+   dest = rcu_dereference_protected(l->dest, 1);
+   if (dest) {
+   ip_vs_dest_put(dest);
+   RCU_INIT_POINTER(l->dest, NULL);
+   }
+   l++;
+   }
+}
+
+static int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
+ struct ip_vs_service *svc)
+{
+   struct list_head *p;
+   struct ip_vs_mh_dest_setup *ds;
+   struct ip_vs_dest *dest;
+   int lw;
+
+   /* If gcd is smaller then 1, number

[PATCH 5/5] ipvs: fix multiplicative hashing in sh/dh/lblc/lblcr algorithms

2018-04-19 Thread Simon Horman

From: Vincent Bernat 

The sh/dh/lblc/lblcr algorithms are using Knuth's multiplicative
hashing incorrectly. Replace its use by the hash_32() macro, which
correctly implements this algorithm. It doesn't use the same constant,
but it shouldn't matter.

Signed-off-by: Vincent Bernat 
Acked-by: Julian Anastasov 
Signed-off-by: Simon Horman 
---
 net/netfilter/ipvs/ip_vs_dh.c| 3 ++-
 net/netfilter/ipvs/ip_vs_lblc.c  | 3 ++-
 net/netfilter/ipvs/ip_vs_lblcr.c | 3 ++-
 net/netfilter/ipvs/ip_vs_sh.c| 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 75f798f8e83b..07459e71d907 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -81,7 +82,7 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const 
union nf_inet_addr *ad
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
 #endif
-   return (ntohl(addr_fold)*2654435761UL) & IP_VS_DH_TAB_MASK;
+   return hash_32(ntohl(addr_fold), IP_VS_DH_TAB_BITS);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 3057e453bf31..08147fc6400c 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -48,6 +48,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* for sysctl */
 #include 
@@ -160,7 +161,7 @@ ip_vs_lblc_hashkey(int af, const union nf_inet_addr *addr)
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
 #endif
-   return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
+   return hash_32(ntohl(addr_fold), IP_VS_LBLC_TAB_BITS);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 92adc04557ed..9b6a6c9e9cfa 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -47,6 +47,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* for sysctl */
 #include 
@@ -323,7 +324,7 @@ ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr)
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
 #endif
-   return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
+   return hash_32(ntohl(addr_fold), IP_VS_LBLCR_TAB_BITS);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 16aaac6eedc9..1e01c782583a 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -96,7 +96,8 @@ ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr,
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
 #endif
-   return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) &
+   return (offset + hash_32(ntohs(port) + ntohl(addr_fold),
+IP_VS_SH_TAB_BITS)) &
IP_VS_SH_TAB_MASK;
 }
 
-- 
2.11.0

RE: [PATCH] net: phy: marvell: clear wol event before setting it

2018-04-19 Thread Bhadram Varka

Hi,

> -Original Message-
> From: Jisheng Zhang 
> Sent: Thursday, April 19, 2018 2:24 PM
> To: Bhadram Varka 
> Cc: Andrew Lunn ; Florian Fainelli ;
> David S. Miller ; netdev@vger.kernel.org; linux-
> ker...@vger.kernel.org; Jingju Hou 
> Subject: Re: [PATCH] net: phy: marvell: clear wol event before setting it
> 
> Hi,
> 
> On Thu, 19 Apr 2018 08:38:45 + Bhadram Varka wrote:
> 
> > Hi,
> >
> > > -Original Message-
> > > From: netdev-ow...@vger.kernel.org  On
> > > Behalf Of Jisheng Zhang
> > > Sent: Thursday, April 19, 2018 1:33 PM
> > > To: Andrew Lunn ; Florian Fainelli
> > > ; David S. Miller 
> > > Cc: netdev@vger.kernel.org; linux-ker...@vger.kernel.org; Jingju Hou
> > > 
> > > Subject: [PATCH] net: phy: marvell: clear wol event before setting
> > > it
> > >
> > > From: Jingju Hou 
> > >
> > > If WOL event happened once, the LED[2] interrupt pin will not be
> > > cleared unless reading the CSISR register. So clear the WOL event before
> enabling it.
> > >
> > > Signed-off-by: Jingju Hou 
> > > Signed-off-by: Jisheng Zhang 
> > > ---
> > >  drivers/net/phy/marvell.c | 9 +
> > >  1 file changed, 9 insertions(+)
> > >
> > > diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
> > > index c22e8e383247..b6abe1cbc84b 100644
> > > --- a/drivers/net/phy/marvell.c
> > > +++ b/drivers/net/phy/marvell.c
> > > @@ -115,6 +115,9 @@
> > >  /* WOL Event Interrupt Enable */
> > >  #define MII_88E1318S_PHY_CSIER_WOL_EIE   BIT(7)
> > >
> > > +/* Copper Specific Interrupt Status Register */
> > > +#define MII_88E1318S_PHY_CSISR   0x13
> > > +
> >
> > There is already macro to represent this register - MII_M1011_IEVENT. Do we
> need this macro ?
> 
> Good point. Will use MII_M1011_IEVENT instead in v2.
> 
> >
> > >  /* LED Timer Control Register */
> > >  #define MII_88E1318S_PHY_LED_TCR 0x12
> > >  #define MII_88E1318S_PHY_LED_TCR_FORCE_INT   BIT(15)
> > > @@ -1393,6 +1396,12 @@ static int m88e1318_set_wol(struct phy_device
> > > *phydev,
> > >   if (err < 0)
> > >   goto error;
> > >
> > > + /* If WOL event happened once, the LED[2] interrupt pin
> > > +  * will not be cleared unless reading the CSISR register.
> > > +  * So clear the WOL event first before enabling it.
> > > +  */
> > > + phy_read(phydev, MII_88E1318S_PHY_CSISR);
> >
> > This part of the operation already taken care by ack_interrupt and
> > did_interrupt [] .ack_interrupt = &marvell_ack_interrupt,
> > .did_interrupt = &m88e1121_did_interrupt, [...]
> >
> > If at all WOL event occurred marvell_ack_interrupt will take care of 
> > clearing the
> interrupt status register.
> > Am I missing anything here ?
> 
> If there's no valid irq for phy, the ack_interrupt/did_interrupt won't be 
> called.

Which means that the PHY is not having Interrupt pin ?

Generally through PHY interrupt will wake up the system right. If there is no 
interrupt pin then how the system will wake up the from suspend for the magic 
packet.?

Thanks!

[net-next 1/3] tipc: set default MTU for UDP media

2018-04-19 Thread GhantaKrishnamurthy MohanKrishna

Currently, all bearers are configured with MTU value same as the
underlying L2 device. However, in case of bearers with media type
UDP, higher throughput is possible with a fixed and higher emulated
MTU value than adapting to the underlying L2 MTU.

In this commit, we introduce a parameter mtu in struct tipc_media
and a default value is set for UDP. A default value of 14k
was determined by experimentation and found to have a higher throughput
than 16k. MTU for UDP bearers are assigned the above set value of
media MTU.

Acked-by: Ying Xue 
Acked-by: Jon Maloy 
Signed-off-by: GhantaKrishnamurthy MohanKrishna 

---
 include/uapi/linux/tipc_config.h | 5 +
 net/tipc/udp_media.c | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index 3f29e3c8ed06..4b2c93b1934c 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -185,6 +185,11 @@
 #define TIPC_DEF_LINK_WIN 50
 #define TIPC_MAX_LINK_WIN 8191
 
+/*
+ * Default MTU for UDP media
+ */
+
+#define TIPC_DEF_LINK_UDP_MTU 14000
 
 struct tipc_node_info {
__be32 addr;/* network address of node */
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index e7d91f5d5cae..9783101bc4a9 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -713,8 +713,7 @@ static int tipc_udp_enable(struct net *net, struct 
tipc_bearer *b,
err = -EINVAL;
goto err;
}
-   b->mtu = dev->mtu - sizeof(struct iphdr)
-   - sizeof(struct udphdr);
+   b->mtu = b->media->mtu;
 #if IS_ENABLED(CONFIG_IPV6)
} else if (local.proto == htons(ETH_P_IPV6)) {
udp_conf.family = AF_INET6;
@@ -803,6 +802,7 @@ struct tipc_media udp_media_info = {
.priority   = TIPC_DEF_LINK_PRI,
.tolerance  = TIPC_DEF_LINK_TOL,
.window = TIPC_DEF_LINK_WIN,
+   .mtu= TIPC_DEF_LINK_UDP_MTU,
.type_id= TIPC_MEDIA_TYPE_UDP,
.hwaddr_len = 0,
.name   = "udp"
-- 
2.1.4

[net-next 3/3] tipc: confgiure and apply UDP bearer MTU on running links

2018-04-19 Thread GhantaKrishnamurthy MohanKrishna

Currently, we have option to configure MTU of UDP media. The configured
MTU takes effect on the links going up after that moment. I.e, a user
has to reset bearer to have new value applied across its links. This is
confusing and disturbing on a running cluster.

We now introduce the functionality to change the default UDP bearer MTU
in struct tipc_bearer. Additionally, the links are updated dynamically,
without any need for a reset, when bearer value is changed. We leverage
the existing per-link functionality and the design being symetrical to
the confguration of link tolerance.

Acked-by: Jon Maloy 
Signed-off-by: GhantaKrishnamurthy MohanKrishna 

---
 net/tipc/bearer.c | 16 +++-
 net/tipc/node.c   | 12 +---
 net/tipc/node.h   |  2 +-
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index a22caf9e5a18..2dfb492a7c94 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -697,6 +697,9 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window))
goto prop_msg_full;
+   if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP)
+   if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu))
+   goto prop_msg_full;
 
nla_nest_end(msg->skb, prop);
 
@@ -979,12 +982,23 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct 
genl_info *info)
 
if (props[TIPC_NLA_PROP_TOL]) {
b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
-   tipc_node_apply_tolerance(net, b);
+   tipc_node_apply_property(net, b, TIPC_NLA_PROP_TOL);
}
if (props[TIPC_NLA_PROP_PRIO])
b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+   if (props[TIPC_NLA_PROP_MTU]) {
+   if (b->media->type_id != TIPC_MEDIA_TYPE_UDP)
+   return -EINVAL;
+#ifdef CONFIG_TIPC_MEDIA_UDP
+   if (tipc_udp_mtu_bad(nla_get_u32
+(props[TIPC_NLA_PROP_MTU])))
+   return -EINVAL;
+   b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
+   tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU);
+#endif
+   }
}
 
return 0;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c77dd2f3c589..b71e4e376bb9 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1681,7 +1681,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, 
struct tipc_bearer *b)
kfree_skb(skb);
 }
 
-void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b)
+void tipc_node_apply_property(struct net *net, struct tipc_bearer *b,
+ int prop)
 {
struct tipc_net *tn = tipc_net(net);
int bearer_id = b->identity;
@@ -1696,8 +1697,13 @@ void tipc_node_apply_tolerance(struct net *net, struct 
tipc_bearer *b)
list_for_each_entry_rcu(n, &tn->node_list, list) {
tipc_node_write_lock(n);
e = &n->links[bearer_id];
-   if (e->link)
-   tipc_link_set_tolerance(e->link, b->tolerance, &xmitq);
+   if (e->link) {
+   if (prop == TIPC_NLA_PROP_TOL)
+   tipc_link_set_tolerance(e->link, b->tolerance,
+   &xmitq);
+   else if (prop == TIPC_NLA_PROP_MTU)
+   tipc_link_set_mtu(e->link, b->mtu);
+   }
tipc_node_write_unlock(n);
tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr);
}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index f24b83500df1..bb271a37c93f 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -67,7 +67,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, u8 
*peer_id128,
  struct tipc_media_addr *maddr,
  bool *respond, bool *dupl_addr);
 void tipc_node_delete_links(struct net *net, int bearer_id);
-void tipc_node_apply_tolerance(struct net *net, struct tipc_bearer *b);
+void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, int 
prop);
 int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
   char *linkname, size_t len);
 int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
-- 
2.1.4

[net-next 2/3] tipc: implement configuration of UDP media MTU

2018-04-19 Thread GhantaKrishnamurthy MohanKrishna

In previous commit, we changed the default emulated MTU for UDP bearers
to 14k.

This commit adds the functionality to set/change the default value
by configuring new MTU for UDP media. UDP bearer(s) have to be disabled
and enabled back for the new MTU to take effect.

Acked-by: Ying Xue 
Acked-by: Jon Maloy 
Signed-off-by: GhantaKrishnamurthy MohanKrishna 

---
 include/uapi/linux/tipc_netlink.h |  1 +
 net/tipc/bearer.c | 13 +
 net/tipc/bearer.h |  3 +++
 net/tipc/udp_media.h  | 14 ++
 4 files changed, 31 insertions(+)

diff --git a/include/uapi/linux/tipc_netlink.h 
b/include/uapi/linux/tipc_netlink.h
index 0affb682e5e3..85c11982c89b 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -266,6 +266,7 @@ enum {
TIPC_NLA_PROP_PRIO, /* u32 */
TIPC_NLA_PROP_TOL,  /* u32 */
TIPC_NLA_PROP_WIN,  /* u32 */
+   TIPC_NLA_PROP_MTU,  /* u32 */
 
__TIPC_NLA_PROP_MAX,
TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index f7d47c89d658..a22caf9e5a18 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1029,6 +1029,9 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg,
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window))
goto prop_msg_full;
+   if (media->type_id == TIPC_MEDIA_TYPE_UDP)
+   if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu))
+   goto prop_msg_full;
 
nla_nest_end(msg->skb, prop);
nla_nest_end(msg->skb, attrs);
@@ -1158,6 +1161,16 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct 
genl_info *info)
m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+   if (props[TIPC_NLA_PROP_MTU]) {
+   if (m->type_id != TIPC_MEDIA_TYPE_UDP)
+   return -EINVAL;
+#ifdef CONFIG_TIPC_MEDIA_UDP
+   if (tipc_udp_mtu_bad(nla_get_u32
+(props[TIPC_NLA_PROP_MTU])))
+   return -EINVAL;
+   m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
+#endif
+   }
}
 
return 0;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 6efcee63a381..394290cbbb1d 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -94,6 +94,8 @@ struct tipc_bearer;
  * @priority: default link (and bearer) priority
  * @tolerance: default time (in ms) before declaring link failure
  * @window: default window (in packets) before declaring link congestion
+ * @mtu: max packet size bearer can support for media type not dependent on
+ * underlying device MTU
  * @type_id: TIPC media identifier
  * @hwaddr_len: TIPC media address len
  * @name: media name
@@ -118,6 +120,7 @@ struct tipc_media {
u32 priority;
u32 tolerance;
u32 window;
+   u32 mtu;
u32 type_id;
u32 hwaddr_len;
char name[TIPC_MAX_MEDIA_NAME];
diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h
index 281bbae87726..e7455cc73e16 100644
--- a/net/tipc/udp_media.h
+++ b/net/tipc/udp_media.h
@@ -38,9 +38,23 @@
 #ifndef _TIPC_UDP_MEDIA_H
 #define _TIPC_UDP_MEDIA_H
 
+#include 
+#include 
+
 int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr);
 int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer 
*b);
 int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback 
*cb);
 
+/* check if configured MTU is too low for tipc headers */
+static inline bool tipc_udp_mtu_bad(u32 mtu)
+{
+   if (mtu >= (TIPC_MIN_BEARER_MTU + sizeof(struct iphdr) +
+   sizeof(struct udphdr)))
+   return false;
+
+   pr_warn("MTU too low for tipc bearer\n");
+   return true;
+}
+
 #endif
 #endif
-- 
2.1.4

[net-next 0/3] tipc: Confgiuration of MTU for media UDP

2018-04-19 Thread GhantaKrishnamurthy MohanKrishna

Systematic measurements have shown that an emulated MTU of 14k for
UDP bearers is the optimal value for maximal throughput. Accordingly,
the default MTU of UDP bearers is changed to 14k.

We also provide users with a fallback option from this value,
by providing support to configure MTU for UDP bearers. The following
options are introduced which are symmetrical to the design of
confguring link tolerance.

- Configure media with new MTU value, which will take effect on
links going up after the moment it was configured. Alternatively,
the bearer has to be disabled and re-enabled, for existing links to
reflect the configured value.

- Configure bearer with new MTU value, which take effect on 
running links dynamically.

Please note:
- User has to change MTU at both endpoints, otherwise the link 
will fall back to smallest MTU after a reset.
- Failover from a link with higher MTU to a link with lower MTU

GhantaKrishnamurthy MohanKrishna (3):
  tipc: set default MTU for UDP media
  tipc: implement configuration of UDP media MTU
  tipc: confgiure and apply UDP bearer MTU on running links

 include/uapi/linux/tipc_config.h  |  5 +
 include/uapi/linux/tipc_netlink.h |  1 +
 net/tipc/bearer.c | 29 -
 net/tipc/bearer.h |  3 +++
 net/tipc/node.c   | 12 +---
 net/tipc/node.h   |  2 +-
 net/tipc/udp_media.c  |  4 ++--
 net/tipc/udp_media.h  | 14 ++
 8 files changed, 63 insertions(+), 7 deletions(-)

-- 
2.1.4

Re: [bpf-next PATCH 1/3] bpf: add id to map tracepoint

2018-04-19 Thread Jesper Dangaard Brouer

On Wed, 18 Apr 2018 17:30:48 +0200
Sebastiano Miano  wrote:

> This patch adds the map id to the bpf tracepoints
> that can be used when monitoring or inspecting map
> related functions.
> 
> Signed-off-by: Sebastiano Miano 
> Suggested-by: Jesper Dangaard Brouer 

Acked-by: Jesper Dangaard Brouer 

Thanks you for doing this.  I've needed this before when
troubleshooting my XDP programs (specifically xdp_ddos01_blacklist[1]).

E.g. when I want to verify that my tools are doing the right thing, I
can now find the XDP prog id via 'ip link' or bpftool, and list the map
IDs used by the prog tool (via bpftool), and now use perf to record map
changes, which now have the needed IDs I can filter on.  Before, I
could not tell the difference if the program was updating the correct
map (which were a mistake I ran into).

Perf record even support supplying filters on the cmdline, like:

 perf record -e bpf:bpf_map_* -a --filter 'id == 2 || id == 1' sleep 100

And yes, doing filtering this way is slow, compared to doing it via a
bpf_prog inside the kernel, which Sebastiano already provide a sample
on howto do.  But I just needed a way to find the bug in my program,
not any high speed usage.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

[1] 
https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/samples/bpf/xdp_ddos01_blacklist_cmdline.c

Re: [PATCH] net: phy: marvell: clear wol event before setting it

2018-04-19 Thread Jisheng Zhang

On Thu, 19 Apr 2018 09:00:40 + Bhadram Varka wrote:

> Hi,
> 
> > -Original Message-
> > From: Jisheng Zhang 
> > Sent: Thursday, April 19, 2018 2:24 PM
> > To: Bhadram Varka 
> > Cc: Andrew Lunn ; Florian Fainelli ;
> > David S. Miller ; netdev@vger.kernel.org; linux-
> > ker...@vger.kernel.org; Jingju Hou 
> > Subject: Re: [PATCH] net: phy: marvell: clear wol event before setting it
> > 
> > Hi,
> > 
> > On Thu, 19 Apr 2018 08:38:45 + Bhadram Varka wrote:
> >   
> > > Hi,
> > >  
> > > > -Original Message-
> > > > From: netdev-ow...@vger.kernel.org  On
> > > > Behalf Of Jisheng Zhang
> > > > Sent: Thursday, April 19, 2018 1:33 PM
> > > > To: Andrew Lunn ; Florian Fainelli
> > > > ; David S. Miller 
> > > > Cc: netdev@vger.kernel.org; linux-ker...@vger.kernel.org; Jingju Hou
> > > > 
> > > > Subject: [PATCH] net: phy: marvell: clear wol event before setting
> > > > it
> > > >
> > > > From: Jingju Hou 
> > > >
> > > > If WOL event happened once, the LED[2] interrupt pin will not be
> > > > cleared unless reading the CSISR register. So clear the WOL event 
> > > > before  
> > enabling it.  
> > > >
> > > > Signed-off-by: Jingju Hou 
> > > > Signed-off-by: Jisheng Zhang 
> > > > ---
> > > >  drivers/net/phy/marvell.c | 9 +
> > > >  1 file changed, 9 insertions(+)
> > > >
> > > > diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
> > > > index c22e8e383247..b6abe1cbc84b 100644
> > > > --- a/drivers/net/phy/marvell.c
> > > > +++ b/drivers/net/phy/marvell.c
> > > > @@ -115,6 +115,9 @@
> > > >  /* WOL Event Interrupt Enable */
> > > >  #define MII_88E1318S_PHY_CSIER_WOL_EIE BIT(7)
> > > >
> > > > +/* Copper Specific Interrupt Status Register */
> > > > +#define MII_88E1318S_PHY_CSISR 0x13
> > > > +  
> > >
> > > There is already macro to represent this register - MII_M1011_IEVENT. Do 
> > > we  
> > need this macro ?
> > 
> > Good point. Will use MII_M1011_IEVENT instead in v2.
> >   
> > >  
> > > >  /* LED Timer Control Register */
> > > >  #define MII_88E1318S_PHY_LED_TCR   0x12
> > > >  #define MII_88E1318S_PHY_LED_TCR_FORCE_INT BIT(15)
> > > > @@ -1393,6 +1396,12 @@ static int m88e1318_set_wol(struct phy_device
> > > > *phydev,
> > > > if (err < 0)
> > > > goto error;
> > > >
> > > > +   /* If WOL event happened once, the LED[2] interrupt pin
> > > > +* will not be cleared unless reading the CSISR 
> > > > register.
> > > > +* So clear the WOL event first before enabling it.
> > > > +*/
> > > > +   phy_read(phydev, MII_88E1318S_PHY_CSISR);  
> > >
> > > This part of the operation already taken care by ack_interrupt and
> > > did_interrupt [] .ack_interrupt = &marvell_ack_interrupt,
> > > .did_interrupt = &m88e1121_did_interrupt, [...]
> > >
> > > If at all WOL event occurred marvell_ack_interrupt will take care of 
> > > clearing the  
> > interrupt status register.  
> > > Am I missing anything here ?  
> > 
> > If there's no valid irq for phy, the ack_interrupt/did_interrupt won't be 
> > called.  
> 
> Which means that the PHY is not having Interrupt pin ?

No valid irq doesn't mean "not having interrupt pin". they are different

> 
> Generally through PHY interrupt will wake up the system right. If there is no 
> interrupt pin then how the system will wake up the from suspend for the magic 
> packet.?
> 

IIRC, the phy irq isn't necessary for WOL. The phy interrupt pin isn't
necessarily taken as "interrupt"

PS: Did you use outlook as your email client? it's not suitable
for kernel mail list.

Thanks

Re: [bpf-next PATCH 2/3] bpf: add id to prog tracepoint

2018-04-19 Thread Jesper Dangaard Brouer

On Wed, 18 Apr 2018 17:30:53 +0200
Sebastiano Miano  wrote:

> This patch adds the prog id to the bpf tracepoints
> that can be used when monitoring or inspecting prog
> related functions.
> 
> Signed-off-by: Sebastiano Miano 
> Suggested-by: Jesper Dangaard Brouer 

Acked-by: Jesper Dangaard Brouer 

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

Re: [bpf-next PATCH 3/3] bpf: add sample program to trace map events

2018-04-19 Thread Jesper Dangaard Brouer

On Wed, 18 Apr 2018 17:30:59 +0200
Sebastiano Miano  wrote:

> This patch adds a sample program, called trace_map_events,
> that shows how to capture map events and filter them based on
> the map id.
> 
> The program accepts a list of map IDs, via the -i command line
> option, and filters all the map events related to those IDs (i.e.,
> map_create/update/lookup/next_key).
> If no IDs are specified, all map events are listed and no filtering
> is performed.
> 
> Sample usage:
> 
>  # trace_map_events -i  -i  -i  ...
> 
> Signed-off-by: Sebastiano Miano 

Acked-by: Jesper Dangaard Brouer 

I have tested it works:

$ sudo ./trace_map_events -i 2
Init bpf_perf_event for cpu:0
Init bpf_perf_event for cpu:1
Init bpf_perf_event for cpu:2
Init bpf_perf_event for cpu:3
Init bpf_perf_event for cpu:4
Init bpf_perf_event for cpu:5
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...
LOOKUP event for map id: 2 and type: 6
Waiting for map events...

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

Re: [PATCH net-next] team: account for oper state

2018-04-19 Thread George Wilkie

On Wed, Apr 18, 2018 at 09:17:32PM +0200, Jiri Pirko wrote:
> Wed, Apr 18, 2018 at 05:33:12PM CEST, gwil...@vyatta.att-mail.com wrote:
> >On Wed, Apr 18, 2018 at 04:58:22PM +0200, Jiri Pirko wrote:
> >> Wed, Apr 18, 2018 at 03:35:49PM CEST, gwil...@vyatta.att-mail.com wrote:
> >> >On Wed, Apr 18, 2018 at 02:56:44PM +0200, Jiri Pirko wrote:
> >> >> Wed, Apr 18, 2018 at 12:29:50PM CEST, gwil...@vyatta.att-mail.com wrote:
> >> >> >Account for operational state when determining port linkup state,
> >> >> >as per Documentation/networking/operstates.txt.
> >> >> 
> >> >> Could you please point me to the exact place in the document where this
> >> >> is suggested?
> >> >> 
> >> >
> >> >Various places cover it I think.
> >> >
> >> >In 1. Introduction:
> >> >"interface is not usable just because the admin enabled it"
> >> >"userspace must be granted the possibility to
> >> >influence operational state"
> >> >
> >> >In 4. Setting from userspace:
> >> >"the userspace application can set IFLA_OPERSTATE
> >> >to IF_OPER_DORMANT or IF_OPER_UP as long as the driver does not set
> >> >netif_carrier_off() or netif_dormant_on()"
> >> >
> >> >We have a use case where we want to set the oper state of the team ports 
> >> >based
> >> >on whether they are actually usable or not (as opposed to just admin up).
> >> 
> >> Are you running a supplicant there or what is the use-case?
> >> 
> >
> >We are using tun/tap interfaces for the team ports with the physical 
> >interfaces
> >under the control of a user process.
> >
> >> How is this handle in other drivers like bond, openvswitch, bridge, etc?
> >
> >It looks like bridge is using it, looking at br_port_carrier_check() and
> >br_add_if().
> 
> Okay, so why do you still need to check netif_carrier_ok?
> Looks like netif_oper_up is enough, right?

Yes, I was being overly cautious. Replacing netif_carrier_ok with netif_oper_up
works OK. I'll send updated patch.

Cheers.


> 
> 
> >
> >Cheers.
> >
> >> 
> >> >
> >> >Cheers.
> >> >
> >> >> 
> >> >> >
> >> >> >Signed-off-by: George Wilkie 
> >> >> >---
> >> >> > drivers/net/team/team.c | 3 ++-
> >> >> > 1 file changed, 2 insertions(+), 1 deletion(-)
> >> >> >
> >> >> >diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
> >> >> >index a6c6ce19..231264a05e55 100644
> >> >> >--- a/drivers/net/team/team.c
> >> >> >+++ b/drivers/net/team/team.c
> >> >> >@@ -2918,7 +2918,8 @@ static int team_device_event(struct 
> >> >> >notifier_block *unused,
> >> >> >   case NETDEV_CHANGE:
> >> >> >   if (netif_running(port->dev))
> >> >> >   team_port_change_check(port,
> >> >> >- 
> >> >> >!!netif_carrier_ok(port->dev));
> >> >> >+ 
> >> >> >!!(netif_carrier_ok(port->dev) &&
> >> >> >+
> >> >> >netif_oper_up(port->dev)));
> >> >> >   break;
> >> >> >   case NETDEV_UNREGISTER:
> >> >> >   team_del_slave(port->team->dev, dev);
> >> >> >-- 
> >> >> >2.11.0
> >> >> >
> 
>

Re: [PATCH bpf-next v3 2/8] bpf: add documentation for eBPF helpers (01-11)

2018-04-19 Thread Daniel Borkmann

On 04/17/2018 04:34 PM, Quentin Monnet wrote:
> Add documentation for eBPF helper functions to bpf.h user header file.
> This documentation can be parsed with the Python script provided in
> another commit of the patch series, in order to provide a RST document
> that can later be converted into a man page.
> 
> The objective is to make the documentation easily understandable and
> accessible to all eBPF developers, including beginners.
> 
> This patch contains descriptions for the following helper functions, all
> written by Alexei:
> 
> - bpf_map_lookup_elem()
> - bpf_map_update_elem()
> - bpf_map_delete_elem()
> - bpf_probe_read()
> - bpf_ktime_get_ns()
> - bpf_trace_printk()
> - bpf_skb_store_bytes()
> - bpf_l3_csum_replace()
> - bpf_l4_csum_replace()
> - bpf_tail_call()
> - bpf_clone_redirect()
> 
> v3:
> - bpf_map_lookup_elem(): Fix description of restrictions for flags
>   related to the existence of the entry.
> - bpf_trace_printk(): State that trace_pipe can be configured. Fix
>   return value in case an unknown format specifier is met. Add a note on
>   kernel log notice when the helper is used. Edit example.
> - bpf_tail_call(): Improve comment on stack inheritance.
> - bpf_clone_redirect(): Improve description of BPF_F_INGRESS flag.
> 
> Cc: Alexei Starovoitov 
> Signed-off-by: Quentin Monnet 

Thanks for doing all this work, Quentin!

Just some small improvements while reading over it:

> ---
>  include/uapi/linux/bpf.h | 210 
> +++
>  1 file changed, 210 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 45f77f01e672..02b7d522b3c0 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -381,6 +381,216 @@ union bpf_attr {
>   * intentional, removing them would break paragraphs for rst2man.
>   *
>   * Start of BPF helper function descriptions:
> + *
> + * void *bpf_map_lookup_elem(struct bpf_map *map, void *key)

const void *key

> + *   Description
> + *   Perform a lookup in *map* for an entry associated to *key*.
> + *   Return
> + *   Map value associated to *key*, or **NULL** if no entry was
> + *   found.
> + *
> + * int bpf_map_update_elem(struct bpf_map *map, void *key, void *value, u64 
> flags)

const void *key, const void *value

> + *   Description
> + *   Add or update the value of the entry associated to *key* in
> + *   *map* with *value*. *flags* is one of:
> + *
> + *   **BPF_NOEXIST**
> + *   The entry for *key* must not exist in the map.
> + *   **BPF_EXIST**
> + *   The entry for *key* must already exist in the map.
> + *   **BPF_ANY**
> + *   No condition on the existence of the entry for *key*.
> + *
> + *   Flag value **BPF_NOEXIST** cannot be used for maps of types
> + *   **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY**  (all
> + *   elements always exist), the helper would return an error.
> + *   Return
> + *   0 on success, or a negative error in case of failure.
> + *
> + * int bpf_map_delete_elem(struct bpf_map *map, void *key)

const void *key

> + *   Description
> + *   Delete entry with *key* from *map*.
> + *   Return
> + *   0 on success, or a negative error in case of failure.
> + *
> + * int bpf_probe_read(void *dst, u32 size, const void *src)
> + *   Description
> + *   For tracing programs, safely attempt to read *size* bytes from
> + *   address *src* and store the data in *dst*.
> + *   Return
> + *   0 on success, or a negative error in case of failure.
> + *
> + * u64 bpf_ktime_get_ns(void)
> + *   Description
> + *   Return the time elapsed since system boot, in nanoseconds.
> + *   Return
> + *   Current *ktime*.
> + *
> + * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
> + *   Description
> + *   This helper is a "printk()-like" facility for debugging. It
> + *   prints a message defined by format *fmt* (of size *fmt_size*)
> + *   to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
> + *   available. It can take up to three additional **u64**
> + *   arguments (as an eBPF helpers, the total number of arguments is
> + *   limited to five).
> + *
> + *   Each time the helper is called, it appends a line to the trace.
> + *   The format of the trace is customizable, and the exact output
> + *   one will get depends on the options set in
> + *   *\/sys/kernel/debug/tracing/trace_options* (see also the
> + *   *README* file under the same directory). However, it usually
> + *   defaults to something like:
> + *
> + *   ::
> + *
> + *   telnet-470   [001] .N.. 419421.045894: 0x0001: 
> 
> + *
> + *   In the above:
> + *
> + *   * ``telnet`` is the name of the current task.
> + *

Re: [RFC v3 net-next 13/18] net/sched: Introduce the TBS Qdisc

2018-04-19 Thread Thomas Gleixner

On Wed, 11 Apr 2018, Jesus Sanchez-Palencia wrote:
> On 04/11/2018 01:16 PM, Thomas Gleixner wrote:
> >> So there is a "clockid" that can be used for the full hw offload modes. On 
> >> this
> >> case, the txtimes are in reference to the NIC's PTP clock, and, as 
> >> discussed, we
> >> can't just use a clockid that was computed from the fd pointing to 
> >> /dev/ptpX .
> > 
> > And the NICs PTP clock is CLOCK_TAI, so there should be no reason to have
> > yet another clock, right?
> 
> Just breaking this down a bit, yes, TAI is the network time base, and the NICs
> PTP clock use that because PTP is (commonly) based on TAI. After the PHCs have
> been synchronized over the network (e.g. with ptp4l), my understanding is that
> if applications want to use the clockid_t CLOCK_TAI as a network clock 
> reference
> it's required that something (i.e. phc2sys) is synchronizing the PHCs and the
> system clock, and also that something calls adjtime to apply the TAI vs UTC
> offset to CLOCK_TAI.
> 
> If we are fine with those 'dependencies', then I agree there is no need for
> another clock.
> 
> I was thinking about the full offload use-cases, thus when no scheduling is
> happening inside the qdiscs. Applications could just read the time from the 
> PHC
> clocks directly without having to rely on any of the above. On this case,
> userspace would use DYNAMIC_CLOCK just to flag that this is the case, but I 
> must
> admit it's not clear to me how common of a use-case that is, or even if it 
> makes
> sense.

I don't think it makes a lot of sense because the only use case for that is
a full user space scheduler which routes _ALL_ traffic. I don't think
that's something which we want to proliferate.

So I'd rather start off with the CLOCK_TAI assumption and if the need
really arises we can discuss that separately. So you can take a clockid
into account when designing the ABI, but have it CLOCK_TAI only for the
start.

Thanks,

tglx

Re: [PATCH] net: phy: marvell: clear wol event before setting it

2018-04-19 Thread Bhadram Varka


HiJisheng,

On 4/19/2018 2:39 PM, Jisheng Zhang wrote:

On Thu, 19 Apr 2018 09:00:40 + Bhadram Varka wrote:


Hi,


-Original Message-
From: Jisheng Zhang 
Sent: Thursday, April 19, 2018 2:24 PM
To: Bhadram Varka 
Cc: Andrew Lunn ; Florian Fainelli ;
David S. Miller ; netdev@vger.kernel.org; linux-
ker...@vger.kernel.org; Jingju Hou 
Subject: Re: [PATCH] net: phy: marvell: clear wol event before setting it

Hi,

On Thu, 19 Apr 2018 08:38:45 + Bhadram Varka wrote:
   

Hi,
  

-Original Message-
From: netdev-ow...@vger.kernel.org  On
Behalf Of Jisheng Zhang
Sent: Thursday, April 19, 2018 1:33 PM
To: Andrew Lunn ; Florian Fainelli
; David S. Miller 
Cc: netdev@vger.kernel.org; linux-ker...@vger.kernel.org; Jingju Hou

Subject: [PATCH] net: phy: marvell: clear wol event before setting
it

From: Jingju Hou 

If WOL event happened once, the LED[2] interrupt pin will not be
cleared unless reading the CSISR register. So clear the WOL event before

enabling it.

Signed-off-by: Jingju Hou 
Signed-off-by: Jisheng Zhang 
---
  drivers/net/phy/marvell.c | 9 +
  1 file changed, 9 insertions(+)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index c22e8e383247..b6abe1cbc84b 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -115,6 +115,9 @@
  /* WOL Event Interrupt Enable */
  #define MII_88E1318S_PHY_CSIER_WOL_EIEBIT(7)

+/* Copper Specific Interrupt Status Register */
+#define MII_88E1318S_PHY_CSISR 0x13
+

There is already macro to represent this register - MII_M1011_IEVENT. Do we

need this macro ?

Good point. Will use MII_M1011_IEVENT instead in v2.
   
  

  /* LED Timer Control Register */
  #define MII_88E1318S_PHY_LED_TCR  0x12
  #define MII_88E1318S_PHY_LED_TCR_FORCE_INTBIT(15)
@@ -1393,6 +1396,12 @@ static int m88e1318_set_wol(struct phy_device
*phydev,
if (err < 0)
goto error;

+   /* If WOL event happened once, the LED[2] interrupt pin
+* will not be cleared unless reading the CSISR register.
+* So clear the WOL event first before enabling it.
+*/
+   phy_read(phydev, MII_88E1318S_PHY_CSISR);

This part of the operation already taken care by ack_interrupt and
did_interrupt [] .ack_interrupt = &marvell_ack_interrupt,
.did_interrupt = &m88e1121_did_interrupt, [...]

If at all WOL event occurred marvell_ack_interrupt will take care of clearing 
the

interrupt status register.

Am I missing anything here ?

If there's no valid irq for phy, the ack_interrupt/did_interrupt won't be 
called.

Which means that the PHY is not having Interrupt pin ?

No valid irq doesn't mean "not having interrupt pin". they are different
Okay. If there is WoL event through magic packet then its valid irq for 
the PHY right.
Then phy_interrupt will be called from there ack/did_interrupts will be 
called. So it clears WoL interrupt.



Generally through PHY interrupt will wake up the system right. If there is no 
interrupt pin then how the system will wake up the from suspend for the magic 
packet.?


IIRC, the phy irq isn't necessary for WOL. The phy interrupt pin isn't
necessarily taken as "interrupt"
Please correct me if I am wrong. In this case how the system will wake 
up from the SC7.There has to be wake capable irq/gpio pin to do this 
operation.


Thanks,
Bhadram.

[PATCH iproute2-next 1/2] man: ip link: document GRE tunnels

2018-04-19 Thread Sabrina Dubroca

GRE tunnels are currently only documented together with IPIP and SIT
tunnels, but they actually have very different configuration
options. Let's separate them.

Signed-off-by: Sabrina Dubroca 
---
 man/man8/ip-link.8.in | 152 --
 1 file changed, 148 insertions(+), 4 deletions(-)

diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in
index 5dee9fcd627a..77ab8a3b9723 100644
--- a/man/man8/ip-link.8.in
+++ b/man/man8/ip-link.8.in
@@ -693,13 +693,13 @@ tunnel.
 .in -8
 
 .TP
-GRE, IPIP, SIT Type Support
-For a link of types
-.I GRE/IPIP/SIT
+IPIP, SIT Type Support
+For a link of type
+.IR IPIP or SIT
 the following additional arguments are supported:
 
 .BI "ip link add " DEVICE
-.BR type " { " gre " | " ipip " | " sit " }"
+.BR type " { " ipip " | " sit " }"
 .BI " remote " ADDR " local " ADDR
 [
 .BR encap " { " fou " | " gue " | " none " }"
@@ -764,6 +764,150 @@ IPv6-Over-IPv4 is not supported for IPIP.
 - make this tunnel externally controlled
 .RB "(e.g. " "ip route encap" ).
 
+.in -8
+.TP
+GRE Type Support
+For a link of type
+.IR GRE " or " GRETAP
+the following additional arguments are supported:
+
+.BI "ip link add " DEVICE
+.BR type " { " gre " | " gretap " }"
+.BI " remote " ADDR " local " ADDR
+[
+.RB [ i | o ] seq
+] [
+.RB [ i | o ] key
+.I KEY
+] [
+.RB [ i | o ] csum
+] [
+.BI ttl " TTL "
+] [
+.BI tos " TOS "
+] [
+.RB [ no ] pmtudisc
+] [
+.RB [ no ] ignore-df
+] [
+.BI dev " PHYS_DEV "
+] [
+.BR encap " { " fou " | " gue " | " none " }"
+] [
+.BR encap-sport " { " \fIPORT " | " auto " }"
+] [
+.BI "encap-dport " PORT
+] [
+.RB [ no ] encap-csum
+] [
+.RB [ no ] encap-remcsum
+] [
+.BR external
+]
+
+.in +8
+.sp
+.BI  remote " ADDR "
+- specifies the remote address of the tunnel.
+
+.sp
+.BI  local " ADDR "
+- specifies the fixed local address for tunneled packets.
+It must be an address on another interface on this host.
+
+.sp
+.RB [ i | o ] seq
+- serialize packets.
+The
+.B oseq
+flag enables sequencing of outgoing packets.
+The
+.B iseq
+flag requires that all input packets are serialized.
+
+.sp
+.RB [ i | o ] key
+.I KEY
+- use keyed GRE with key
+.IR KEY ". "KEY
+is either a number or an IPv4 address-like dotted quad.
+The
+.B key
+parameter specifies the same key to use in both directions.
+The
+.BR ikey " and " okey
+parameters specify different keys for input and output.
+
+.sp
+.RB  [ i | o ] csum
+- generate/require checksums for tunneled packets.
+The
+.B ocsum
+flag calculates checksums for outgoing packets.
+The
+.B icsum
+flag requires that all input packets have the correct
+checksum. The
+.B csum
+flag is equivalent to the combination
+.B "icsum ocsum" .
+
+.sp
+.BI ttl " TTL"
+- specifies the TTL value to use in outgoing packets.
+
+.sp
+.BI tos " TOS"
+- specifies the TOS value to use in outgoing packets.
+
+.sp
+.RB [ no ] pmtudisc
+- enables/disables Path MTU Discovery on this tunnel.
+It is enabled by default. Note that a fixed ttl is incompatible
+with this option: tunneling with a fixed ttl always makes pmtu
+discovery.
+
+.sp
+.RB [ no ] ignore-df
+- enables/disables IPv4 DF suppression on this tunnel.
+Normally datagrams that exceed the MTU will be fragmented; the presence
+of the DF flag inhibits this, resulting instead in an ICMP Unreachable
+(Fragmentation Required) message.  Enabling this attribute casues the
+DF flag to be ignored.
+
+.sp
+.BI dev " PHYS_DEV"
+- specifies the physical device to use for tunnel endpoint communication.
+
+.sp
+.BR encap " { " fou " | " gue " | " none " }"
+- specifies type of secondary UDP encapsulation. "fou" indicates
+Foo-Over-UDP, "gue" indicates Generic UDP Encapsulation.
+
+.sp
+.BR encap-sport " { " \fIPORT " | " auto " }"
+- specifies the source port in UDP encapsulation.
+.IR PORT
+indicates the port by number, "auto"
+indicates that the port number should be chosen automatically
+(the kernel picks a flow based on the flow hash of the
+encapsulated packet).
+
+.sp
+.RB [ no ] encap-csum
+- specifies if UDP checksums are enabled in the secondary
+encapsulation.
+
+.sp
+.RB [ no ] encap-remcsum
+- specifies if Remote Checksum Offload is enabled. This is only
+applicable for Generic UDP Encapsulation.
+
+.sp
+.BR external
+- make this tunnel externally controlled
+.RB "(e.g. " "ip route encap" ).
+
 .in -8
 
 .TP
-- 
2.17.0

[PATCH iproute2-next 2/2] gre/gre6: allow clearing {,i,o}{key,seq,csum} flags

2018-04-19 Thread Sabrina Dubroca

Currently, iproute allows setting those flags, but it's impossible to
clear them, since their current value is fetched from the kernel and
then we OR in the additional flags passed on the command line.

Add no* variants to allow clearing them.

Signed-off-by: Sabrina Dubroca 
---
 ip/link_gre.c | 27 ---
 ip/link_gre6.c| 27 ---
 man/man8/ip-link.8.in | 27 ++-
 3 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/ip/link_gre.c b/ip/link_gre.c
index bc1cee8fbca2..4c8849f7051d 100644
--- a/ip/link_gre.c
+++ b/ip/link_gre.c
@@ -31,9 +31,9 @@ static void gre_print_help(struct link_util *lu, int argc, 
char **argv, FILE *f)
);
fprintf(f,
" [ local ADDR ]\n"
-   " [ [i|o]seq ]\n"
-   " [ [i|o]key KEY ]\n"
-   " [ [i|o]csum ]\n"
+   " [ [no][i|o]seq ]\n"
+   " [ [i|o]key KEY | no[i|o]key ]\n"
+   " [ [no][i|o]csum ]\n"
" [ ttl TTL ]\n"
" [ tos TOS ]\n"
" [ [no]pmtudisc ]\n"
@@ -210,28 +210,49 @@ get_failed:
iflags |= GRE_KEY;
oflags |= GRE_KEY;
ikey = okey = tnl_parse_key("key", *argv);
+   } else if (!matches(*argv, "nokey")) {
+   iflags &= ~GRE_KEY;
+   oflags &= ~GRE_KEY;
} else if (!matches(*argv, "ikey")) {
NEXT_ARG();
iflags |= GRE_KEY;
ikey = tnl_parse_key("ikey", *argv);
+   } else if (!matches(*argv, "noikey")) {
+   iflags &= ~GRE_KEY;
} else if (!matches(*argv, "okey")) {
NEXT_ARG();
oflags |= GRE_KEY;
okey = tnl_parse_key("okey", *argv);
+   } else if (!matches(*argv, "noikey")) {
+   iflags &= ~GRE_KEY;
} else if (!matches(*argv, "seq")) {
iflags |= GRE_SEQ;
oflags |= GRE_SEQ;
+   } else if (!matches(*argv, "noseq")) {
+   iflags &= ~GRE_SEQ;
+   oflags &= ~GRE_SEQ;
} else if (!matches(*argv, "iseq")) {
iflags |= GRE_SEQ;
+   } else if (!matches(*argv, "noiseq")) {
+   iflags &= ~GRE_SEQ;
} else if (!matches(*argv, "oseq")) {
oflags |= GRE_SEQ;
+   } else if (!matches(*argv, "nooseq")) {
+   oflags &= ~GRE_SEQ;
} else if (!matches(*argv, "csum")) {
iflags |= GRE_CSUM;
oflags |= GRE_CSUM;
+   } else if (!matches(*argv, "nocsum")) {
+   iflags &= ~GRE_CSUM;
+   oflags &= ~GRE_CSUM;
} else if (!matches(*argv, "icsum")) {
iflags |= GRE_CSUM;
+   } else if (!matches(*argv, "noicsum")) {
+   iflags &= ~GRE_CSUM;
} else if (!matches(*argv, "ocsum")) {
oflags |= GRE_CSUM;
+   } else if (!matches(*argv, "noocsum")) {
+   oflags &= ~GRE_CSUM;
} else if (!matches(*argv, "nopmtudisc")) {
pmtudisc = 0;
} else if (!matches(*argv, "pmtudisc")) {
diff --git a/ip/link_gre6.c b/ip/link_gre6.c
index a6fe0b73d235..542da0c3ccc9 100644
--- a/ip/link_gre6.c
+++ b/ip/link_gre6.c
@@ -38,9 +38,9 @@ static void gre_print_help(struct link_util *lu, int argc, 
char **argv, FILE *f)
);
fprintf(f,
" [ local ADDR ]\n"
-   " [ [i|o]seq ]\n"
-   " [ [i|o]key KEY ]\n"
-   " [ [i|o]csum ]\n"
+   " [ [no][i|o]seq ]\n"
+   " [ [i|o]key KEY | no[i|o]key ]\n"
+   " [ [no][i|o]csum ]\n"
" [ hoplimit TTL ]\n"
" [ encaplimit ELIM ]\n"
" [ tclass TCLASS ]\n"
@@ -220,28 +220,49 @@ get_failed:
iflags |= GRE_KEY;
oflags |= GRE_KEY;
ikey = okey = tnl_parse_key("key", *argv);
+   } else if (!matches(*argv, "nokey")) {
+   iflags &= ~GRE_KEY;
+   oflags &= ~GRE_KEY;
} else if (!matches(*argv, "ikey")) {

[PATCH net-next v2] team: account for oper state

2018-04-19 Thread George Wilkie

Account for operational state when determining port linkup state,
as per Documentation/networking/operstates.txt.

Signed-off-by: George Wilkie 
---
 drivers/net/team/team.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index a6c6ce19..ed4d109f40f3 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2918,7 +2918,7 @@ static int team_device_event(struct notifier_block 
*unused,
case NETDEV_CHANGE:
if (netif_running(port->dev))
team_port_change_check(port,
-  !!netif_carrier_ok(port->dev));
+  !!(netif_oper_up(port->dev)));
break;
case NETDEV_UNREGISTER:
team_del_slave(port->team->dev, dev);
-- 
2.11.0

Re: [PATCH net-next v2] team: account for oper state

2018-04-19 Thread George Wilkie

On Thu, Apr 19, 2018 at 11:24:15AM +0100, George Wilkie wrote:
> Account for operational state when determining port linkup state,
> as per Documentation/networking/operstates.txt.
> 
> Signed-off-by: George Wilkie 
> ---
>  drivers/net/team/team.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
> index a6c6ce19..ed4d109f40f3 100644
> --- a/drivers/net/team/team.c
> +++ b/drivers/net/team/team.c
> @@ -2918,7 +2918,7 @@ static int team_device_event(struct notifier_block 
> *unused,
>   case NETDEV_CHANGE:
>   if (netif_running(port->dev))
>   team_port_change_check(port,
> -!!netif_carrier_ok(port->dev));
> +!!(netif_oper_up(port->dev)));

Bah, forgot to remove the extra ().

>   break;
>   case NETDEV_UNREGISTER:
>   team_del_slave(port->team->dev, dev);
> -- 
> 2.11.0
> 
>

Re: [PATCH bpf-next v3 3/8] bpf: add documentation for eBPF helpers (12-22)

2018-04-19 Thread Daniel Borkmann

On 04/17/2018 04:34 PM, Quentin Monnet wrote:
> Add documentation for eBPF helper functions to bpf.h user header file.
> This documentation can be parsed with the Python script provided in
> another commit of the patch series, in order to provide a RST document
> that can later be converted into a man page.
> 
> The objective is to make the documentation easily understandable and
> accessible to all eBPF developers, including beginners.
> 
> This patch contains descriptions for the following helper functions, all
> written by Alexei:
> 
> - bpf_get_current_pid_tgid()
> - bpf_get_current_uid_gid()
> - bpf_get_current_comm()
> - bpf_skb_vlan_push()
> - bpf_skb_vlan_pop()
> - bpf_skb_get_tunnel_key()
> - bpf_skb_set_tunnel_key()
> - bpf_redirect()
> - bpf_perf_event_output()
> - bpf_get_stackid()
> - bpf_get_current_task()
> 
> v3:
> - bpf_skb_get_tunnel_key(): Change and improve description and example.
> - bpf_redirect(): Improve description of BPF_F_INGRESS flag.
> - bpf_perf_event_output(): Fix first sentence of description. Delete
>   wrong statement on context being evaluated as a struct pt_reg. Remove
>   the long yet incomplete example.
> - bpf_get_stackid(): Add a note about PERF_MAX_STACK_DEPTH being
>   configurable.
> 
> Cc: Alexei Starovoitov 
> Signed-off-by: Quentin Monnet 
> ---
>  include/uapi/linux/bpf.h | 225 
> +++
>  1 file changed, 225 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 02b7d522b3c0..c59bf5b28164 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -591,6 +591,231 @@ union bpf_attr {
>   *   performed again.
>   *   Return
>   *   0 on success, or a negative error in case of failure.
> + *
> + * u64 bpf_get_current_pid_tgid(void)
> + *   Return
> + *   A 64-bit integer containing the current tgid and pid, and
> + *   created as such:
> + *   *current_task*\ **->tgid << 32 \|**
> + *   *current_task*\ **->pid**.
> + *
> + * u64 bpf_get_current_uid_gid(void)
> + *   Return
> + *   A 64-bit integer containing the current GID and UID, and
> + *   created as such: *current_gid* **<< 32 \|** *current_uid*.
> + *
> + * int bpf_get_current_comm(char *buf, u32 size_of_buf)
> + *   Description
> + *   Copy the **comm** attribute of the current task into *buf* of
> + *   *size_of_buf*. The **comm** attribute contains the name of
> + *   the executable (excluding the path) for the current task. The
> + *   *size_of_buf* must be strictly positive. On success, the
> + *   helper makes sure that the *buf* is NUL-terminated. On failure,
> + *   it is filled with zeroes.
> + *   Return
> + *   0 on success, or a negative error in case of failure.
> + *
> + * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 
> vlan_tci)
> + *   Description
> + *   Push a *vlan_tci* (VLAN tag control information) of protocol
> + *   *vlan_proto* to the packet associated to *skb*, then update
> + *   the checksum. Note that if *vlan_proto* is different from
> + *   **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
> + *   be **ETH_P_8021Q**.
> + *
> + *   A call to this helper is susceptible to change data from the
> + *   packet. Therefore, at load time, all checks on pointers
> + *   previously done by the verifier are invalidated and must be
> + *   performed again.
> + *   Return
> + *   0 on success, or a negative error in case of failure.
> + *
> + * int bpf_skb_vlan_pop(struct sk_buff *skb)
> + *   Description
> + *   Pop a VLAN header from the packet associated to *skb*.
> + *
> + *   A call to this helper is susceptible to change data from the
> + *   packet. Therefore, at load time, all checks on pointers
> + *   previously done by the verifier are invalidated and must be
> + *   performed again.
> + *   Return
> + *   0 on success, or a negative error in case of failure.
> + *
> + * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key 
> *key, u32 size, u64 flags)
> + *   Description
> + *   Get tunnel metadata. This helper takes a pointer *key* to an
> + *   empty **struct bpf_tunnel_key** of **size**, that will be
> + *   filled with tunnel metadata for the packet associated to *skb*.
> + *   The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
> + *   indicates that the tunnel is based on IPv6 protocol instead of
> + *   IPv4.
> + *
> + *   The **struct bpf_tunnel_key** is an object that generalizes the
> + *   principal parameters used by various tunneling protocols into a
> + *   single struct. This way, it can be used to easily make a
> + *   decision based on the contents of the encapsulation header,
> + *

[PATCH net-next v3] team: account for oper state

2018-04-19 Thread George Wilkie

Account for operational state when determining port linkup state,
as per Documentation/networking/operstates.txt.

Signed-off-by: George Wilkie 
---
 drivers/net/team/team.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index a6c6ce19..8a8611095ca0 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2918,7 +2918,7 @@ static int team_device_event(struct notifier_block 
*unused,
case NETDEV_CHANGE:
if (netif_running(port->dev))
team_port_change_check(port,
-  !!netif_carrier_ok(port->dev));
+  !!netif_oper_up(port->dev));
break;
case NETDEV_UNREGISTER:
team_del_slave(port->team->dev, dev);
-- 
2.11.0

Re: [PATCH iproute2-next 2/2] gre/gre6: allow clearing {,i,o}{key,seq,csum} flags

2018-04-19 Thread Sabrina Dubroca

2018-04-19, 12:22:42 +0200, Sabrina Dubroca wrote:
> @@ -210,28 +210,49 @@ get_failed:
>   iflags |= GRE_KEY;
>   oflags |= GRE_KEY;
>   ikey = okey = tnl_parse_key("key", *argv);
> + } else if (!matches(*argv, "nokey")) {
> + iflags &= ~GRE_KEY;
> + oflags &= ~GRE_KEY;
>   } else if (!matches(*argv, "ikey")) {
>   NEXT_ARG();
>   iflags |= GRE_KEY;
>   ikey = tnl_parse_key("ikey", *argv);
> + } else if (!matches(*argv, "noikey")) {
> + iflags &= ~GRE_KEY;
>   } else if (!matches(*argv, "okey")) {
>   NEXT_ARG();
>   oflags |= GRE_KEY;
>   okey = tnl_parse_key("okey", *argv);
> + } else if (!matches(*argv, "noikey")) {
> + iflags &= ~GRE_KEY;

Sorry, posted the wrong version. I'll send v2 after I've had a bucket
of coffee.

-- 
Sabrina

[PATCH net 4/6] s390/qeth: fix MAC address update sequence

2018-04-19 Thread Julian Wiedmann

From: Julian Wiedmann 

When changing the MAC address on a L2 qeth device, current code first
unregisters the old address, then registers the new one.
If HW rejects the new address (or the IO fails), the device ends up with
no operable address at all.

Re-order the code flow so that the old address only gets dropped if the
new address was registered successfully. While at it, add logic to catch
some corner-cases.

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_l2_main.c | 55 +++--
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 50a313806dde..830ca56a62e5 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -122,13 +122,10 @@ static int qeth_l2_send_setmac(struct qeth_card *card, 
__u8 *mac)
QETH_CARD_TEXT(card, 2, "L2Setmac");
rc = qeth_l2_send_setdelmac(card, mac, IPA_CMD_SETVMAC);
if (rc == 0) {
-   card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED;
-   ether_addr_copy(card->dev->dev_addr, mac);
dev_info(&card->gdev->dev,
-   "MAC address %pM successfully registered on device 
%s\n",
-   card->dev->dev_addr, card->dev->name);
+"MAC address %pM successfully registered on device 
%s\n",
+mac, card->dev->name);
} else {
-   card->info.mac_bits &= ~QETH_LAYER2_MAC_REGISTERED;
switch (rc) {
case -EEXIST:
dev_warn(&card->gdev->dev,
@@ -143,19 +140,6 @@ static int qeth_l2_send_setmac(struct qeth_card *card, 
__u8 *mac)
return rc;
 }
 
-static int qeth_l2_send_delmac(struct qeth_card *card, __u8 *mac)
-{
-   int rc;
-
-   QETH_CARD_TEXT(card, 2, "L2Delmac");
-   if (!(card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED))
-   return 0;
-   rc = qeth_l2_send_setdelmac(card, mac, IPA_CMD_DELVMAC);
-   if (rc == 0)
-   card->info.mac_bits &= ~QETH_LAYER2_MAC_REGISTERED;
-   return rc;
-}
-
 static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac)
 {
enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ?
@@ -520,6 +504,7 @@ static int qeth_l2_set_mac_address(struct net_device *dev, 
void *p)
 {
struct sockaddr *addr = p;
struct qeth_card *card = dev->ml_priv;
+   u8 old_addr[ETH_ALEN];
int rc = 0;
 
QETH_CARD_TEXT(card, 3, "setmac");
@@ -531,14 +516,35 @@ static int qeth_l2_set_mac_address(struct net_device 
*dev, void *p)
return -EOPNOTSUPP;
}
QETH_CARD_HEX(card, 3, addr->sa_data, ETH_ALEN);
+   if (!is_valid_ether_addr(addr->sa_data))
+   return -EADDRNOTAVAIL;
+
if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) {
QETH_CARD_TEXT(card, 3, "setmcREC");
return -ERESTARTSYS;
}
-   rc = qeth_l2_send_delmac(card, &card->dev->dev_addr[0]);
-   if (!rc || (rc == -ENOENT))
-   rc = qeth_l2_send_setmac(card, addr->sa_data);
-   return rc ? -EINVAL : 0;
+
+   if (!qeth_card_hw_is_reachable(card)) {
+   ether_addr_copy(dev->dev_addr, addr->sa_data);
+   return 0;
+   }
+
+   /* don't register the same address twice */
+   if (ether_addr_equal_64bits(dev->dev_addr, addr->sa_data) &&
+   (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED))
+   return 0;
+
+   /* add the new address, switch over, drop the old */
+   rc = qeth_l2_send_setmac(card, addr->sa_data);
+   if (rc)
+   return rc;
+   ether_addr_copy(old_addr, dev->dev_addr);
+   ether_addr_copy(dev->dev_addr, addr->sa_data);
+
+   if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)
+   qeth_l2_remove_mac(card, old_addr);
+   card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED;
+   return 0;
 }
 
 static void qeth_promisc_to_bridge(struct qeth_card *card)
@@ -1068,8 +1074,9 @@ static int __qeth_l2_set_online(struct ccwgroup_device 
*gdev, int recovery_mode)
goto out_remove;
}
 
-   if (card->info.type != QETH_CARD_TYPE_OSN)
-   qeth_l2_send_setmac(card, &card->dev->dev_addr[0]);
+   if (card->info.type != QETH_CARD_TYPE_OSN &&
+   !qeth_l2_send_setmac(card, card->dev->dev_addr))
+   card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED;
 
if (qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP)) {
if (card->info.hwtrap &&
-- 
2.13.5

[PATCH net 3/6] s390/qeth: handle failure on workqueue creation

2018-04-19 Thread Julian Wiedmann

Creating the global workqueue during driver init may fail, deal with it.
Also, destroy the created workqueue on any subsequent error.

Fixes: 0f54761d167f ("qeth: Support VEPA mode")
Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core_main.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index 5ec47c6ebaa6..9a08b545d018 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -6540,10 +6540,14 @@ static int __init qeth_core_init(void)
mutex_init(&qeth_mod_mutex);
 
qeth_wq = create_singlethread_workqueue("qeth_wq");
+   if (!qeth_wq) {
+   rc = -ENOMEM;
+   goto out_err;
+   }
 
rc = qeth_register_dbf_views();
if (rc)
-   goto out_err;
+   goto dbf_err;
qeth_core_root_dev = root_device_register("qeth");
rc = PTR_ERR_OR_ZERO(qeth_core_root_dev);
if (rc)
@@ -6580,6 +6584,8 @@ static int __init qeth_core_init(void)
root_device_unregister(qeth_core_root_dev);
 register_err:
qeth_unregister_dbf_views();
+dbf_err:
+   destroy_workqueue(qeth_wq);
 out_err:
pr_err("Initializing the qeth device driver failed\n");
return rc;
-- 
2.13.5

[PATCH net 0/6] s390/qeth: fixes 2018-04-19

2018-04-19 Thread Julian Wiedmann

Hi Dave,

new mail address, same old me.

Please apply the following qeth fixes for 4.17. The common theme
seems to be error handling improvements in various areas of cmd IO.

Patches 1-3 should also go back to stable.

Thank you,
Julian


Julian Wiedmann (6):
  s390/qeth: fix error handling in adapter command callbacks
  s390/qeth: avoid control IO completion stalls
  s390/qeth: handle failure on workqueue creation
  s390/qeth: fix MAC address update sequence
  s390/qeth: fix request-side race during cmd IO timeout
  s390/qeth: use Read device to query hypervisor for MAC

 drivers/s390/net/qeth_core.h  |   2 -
 drivers/s390/net/qeth_core_main.c | 158 +-
 drivers/s390/net/qeth_core_mpc.h  |  12 +++
 drivers/s390/net/qeth_l2_main.c   |  59 +++---
 4 files changed, 116 insertions(+), 115 deletions(-)

-- 
2.13.5

[PATCH net 1/6] s390/qeth: fix error handling in adapter command callbacks

2018-04-19 Thread Julian Wiedmann

From: Julian Wiedmann 

Make sure to check both return code fields before(!) processing the
command response. Otherwise we risk operating on invalid data.

This matches an earlier fix for SETASSPARMS commands, see
commit ad3cbf613329 ("s390/qeth: fix error handling in checksum cmd callback").

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core_main.c | 85 +--
 1 file changed, 37 insertions(+), 48 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index 04fefa5bb08d..36bc94088de1 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3033,28 +3033,23 @@ static int qeth_send_startlan(struct qeth_card *card)
return rc;
 }
 
-static int qeth_default_setadapterparms_cb(struct qeth_card *card,
-   struct qeth_reply *reply, unsigned long data)
+static int qeth_setadpparms_inspect_rc(struct qeth_ipa_cmd *cmd)
 {
-   struct qeth_ipa_cmd *cmd;
-
-   QETH_CARD_TEXT(card, 4, "defadpcb");
-
-   cmd = (struct qeth_ipa_cmd *) data;
-   if (cmd->hdr.return_code == 0)
+   if (!cmd->hdr.return_code)
cmd->hdr.return_code =
cmd->data.setadapterparms.hdr.return_code;
-   return 0;
+   return cmd->hdr.return_code;
 }
 
 static int qeth_query_setadapterparms_cb(struct qeth_card *card,
struct qeth_reply *reply, unsigned long data)
 {
-   struct qeth_ipa_cmd *cmd;
+   struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
 
QETH_CARD_TEXT(card, 3, "quyadpcb");
+   if (qeth_setadpparms_inspect_rc(cmd))
+   return 0;
 
-   cmd = (struct qeth_ipa_cmd *) data;
if (cmd->data.setadapterparms.data.query_cmds_supp.lan_type & 0x7f) {
card->info.link_type =
  cmd->data.setadapterparms.data.query_cmds_supp.lan_type;
@@ -3062,7 +3057,7 @@ static int qeth_query_setadapterparms_cb(struct qeth_card 
*card,
}
card->options.adp.supported_funcs =
cmd->data.setadapterparms.data.query_cmds_supp.supported_cmds;
-   return qeth_default_setadapterparms_cb(card, reply, (unsigned long)cmd);
+   return 0;
 }
 
 static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card,
@@ -3154,22 +3149,20 @@ EXPORT_SYMBOL_GPL(qeth_query_ipassists);
 static int qeth_query_switch_attributes_cb(struct qeth_card *card,
struct qeth_reply *reply, unsigned long data)
 {
-   struct qeth_ipa_cmd *cmd;
-   struct qeth_switch_info *sw_info;
+   struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
struct qeth_query_switch_attributes *attrs;
+   struct qeth_switch_info *sw_info;
 
QETH_CARD_TEXT(card, 2, "qswiatcb");
-   cmd = (struct qeth_ipa_cmd *) data;
-   sw_info = (struct qeth_switch_info *)reply->param;
-   if (cmd->data.setadapterparms.hdr.return_code == 0) {
-   attrs = &cmd->data.setadapterparms.data.query_switch_attributes;
-   sw_info->capabilities = attrs->capabilities;
-   sw_info->settings = attrs->settings;
-   QETH_CARD_TEXT_(card, 2, "%04x%04x", sw_info->capabilities,
-   sw_info->settings);
-   }
-   qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd);
+   if (qeth_setadpparms_inspect_rc(cmd))
+   return 0;
 
+   sw_info = (struct qeth_switch_info *)reply->param;
+   attrs = &cmd->data.setadapterparms.data.query_switch_attributes;
+   sw_info->capabilities = attrs->capabilities;
+   sw_info->settings = attrs->settings;
+   QETH_CARD_TEXT_(card, 2, "%04x%04x", sw_info->capabilities,
+   sw_info->settings);
return 0;
 }
 
@@ -4207,16 +4200,13 @@ EXPORT_SYMBOL_GPL(qeth_do_send_packet);
 static int qeth_setadp_promisc_mode_cb(struct qeth_card *card,
struct qeth_reply *reply, unsigned long data)
 {
-   struct qeth_ipa_cmd *cmd;
+   struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
struct qeth_ipacmd_setadpparms *setparms;
 
QETH_CARD_TEXT(card, 4, "prmadpcb");
 
-   cmd = (struct qeth_ipa_cmd *) data;
setparms = &(cmd->data.setadapterparms);
-
-   qeth_default_setadapterparms_cb(card, reply, (unsigned long)cmd);
-   if (cmd->hdr.return_code) {
+   if (qeth_setadpparms_inspect_rc(cmd)) {
QETH_CARD_TEXT_(card, 4, "prmrc%x", cmd->hdr.return_code);
setparms->data.mode = SET_PROMISC_MODE_OFF;
}
@@ -4286,18 +4276,18 @@ EXPORT_SYMBOL_GPL(qeth_get_stats);
 static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card,
struct qeth_reply *reply, unsigned long data)
 {
-   struct qeth_ipa_cmd *cmd;
+   struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
 
QETH_CARD_TEXT(card, 4, "chgmaccb");
+   i

[PATCH net 2/6] s390/qeth: avoid control IO completion stalls

2018-04-19 Thread Julian Wiedmann

From: Julian Wiedmann 

For control IO, qeth currently tracks the index of the buffer that it
expects to complete the next IO on each qeth_channel. If the channel
presents an IRQ while this buffer has not yet completed, no completion
processing for _any_ completed buffer takes place.
So if the 'next buffer' is skipped for any sort of reason* (eg. when it
is released due to error conditions, before the IO is started), the
buffer obviously won't switch to PROCESSED until it is eventually
allocated for a _different_ IO and completes.
Until this happens, all completion processing on that channel stalls
and pending requests possibly time out.

As a fix, remove the whole 'next buffer' logic and simply process any
IO buffer right when it completes. A channel will never have more than
one IO pending, so there's no risk of processing out-of-sequence.

*Note: currently just one location in the code really handles this problem,
   by advancing the 'next' index manually.

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core.h  |  2 --
 drivers/s390/net/qeth_core_main.c | 22 +-
 2 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 4326715dc13e..78b98b3e7efa 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -557,7 +557,6 @@ enum qeth_prot_versions {
 enum qeth_cmd_buffer_state {
BUF_STATE_FREE,
BUF_STATE_LOCKED,
-   BUF_STATE_PROCESSED,
 };
 
 enum qeth_cq {
@@ -601,7 +600,6 @@ struct qeth_channel {
struct qeth_cmd_buffer iob[QETH_CMD_BUFFER_NO];
atomic_t irq_pending;
int io_buf_no;
-   int buf_no;
 };
 
 /**
diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index 36bc94088de1..5ec47c6ebaa6 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -818,7 +818,6 @@ void qeth_clear_cmd_buffers(struct qeth_channel *channel)
 
for (cnt = 0; cnt < QETH_CMD_BUFFER_NO; cnt++)
qeth_release_buffer(channel, &channel->iob[cnt]);
-   channel->buf_no = 0;
channel->io_buf_no = 0;
 }
 EXPORT_SYMBOL_GPL(qeth_clear_cmd_buffers);
@@ -924,7 +923,6 @@ static int qeth_setup_channel(struct qeth_channel *channel)
kfree(channel->iob[cnt].data);
return -ENOMEM;
}
-   channel->buf_no = 0;
channel->io_buf_no = 0;
atomic_set(&channel->irq_pending, 0);
spin_lock_init(&channel->iob_lock);
@@ -1100,11 +1098,9 @@ static void qeth_irq(struct ccw_device *cdev, unsigned 
long intparm,
 {
int rc;
int cstat, dstat;
-   struct qeth_cmd_buffer *buffer;
struct qeth_channel *channel;
struct qeth_card *card;
struct qeth_cmd_buffer *iob;
-   __u8 index;
 
if (__qeth_check_irb_error(cdev, intparm, irb))
return;
@@ -1182,25 +1178,18 @@ static void qeth_irq(struct ccw_device *cdev, unsigned 
long intparm,
channel->state = CH_STATE_RCD_DONE;
goto out;
}
-   if (intparm) {
-   buffer = (struct qeth_cmd_buffer *) __va((addr_t)intparm);
-   buffer->state = BUF_STATE_PROCESSED;
-   }
if (channel == &card->data)
return;
if (channel == &card->read &&
channel->state == CH_STATE_UP)
__qeth_issue_next_read(card);
 
-   iob = channel->iob;
-   index = channel->buf_no;
-   while (iob[index].state == BUF_STATE_PROCESSED) {
-   if (iob[index].callback != NULL)
-   iob[index].callback(channel, iob + index);
-
-   index = (index + 1) % QETH_CMD_BUFFER_NO;
+   if (intparm) {
+   iob = (struct qeth_cmd_buffer *) __va((addr_t)intparm);
+   if (iob->callback)
+   iob->callback(iob->channel, iob);
}
-   channel->buf_no = index;
+
 out:
wake_up(&card->wait_q);
return;
@@ -2214,7 +2203,6 @@ int qeth_send_control_data(struct qeth_card *card, int 
len,
 error:
atomic_set(&card->write.irq_pending, 0);
qeth_release_buffer(iob->channel, iob);
-   card->write.buf_no = (card->write.buf_no + 1) % QETH_CMD_BUFFER_NO;
rc = reply->rc;
qeth_put_reply(reply);
return rc;
-- 
2.13.5

[PATCH net 5/6] s390/qeth: fix request-side race during cmd IO timeout

2018-04-19 Thread Julian Wiedmann

From: Julian Wiedmann 

Submitting a cmd IO request (usually on the WRITE device, but for IDX
also on the READ device) is currently done with ccw_device_start()
and a manual timeout in the caller.
On timeout, the caller cleans up the related resources (eg. IO buffer).
But 1) the IO might still be active and utilize those resources, and
2) when the IO completes, qeth_irq() will attempt to clean up the
   same resources again.

Instead of introducing additional resource locking, switch to
ccw_device_start_timeout() to ensure IO termination after timeout, and
let the IRQ handler alone deal with cleaning up after a request.

This also removes a stray write->irq_pending reset from
clear_ipacmd_list(). The routine doesn't terminate any pending IO on
the WRITE device, so this should be handled properly via IO timeout
in the IRQ handler.

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core_main.c | 51 ---
 drivers/s390/net/qeth_core_mpc.h  | 12 +
 drivers/s390/net/qeth_l2_main.c   |  4 +--
 3 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index 9a08b545d018..9b22d5d496ae 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -706,7 +706,6 @@ void qeth_clear_ipacmd_list(struct qeth_card *card)
qeth_put_reply(reply);
}
spin_unlock_irqrestore(&card->lock, flags);
-   atomic_set(&card->write.irq_pending, 0);
 }
 EXPORT_SYMBOL_GPL(qeth_clear_ipacmd_list);
 
@@ -1098,14 +1097,9 @@ static void qeth_irq(struct ccw_device *cdev, unsigned 
long intparm,
 {
int rc;
int cstat, dstat;
+   struct qeth_cmd_buffer *iob = NULL;
struct qeth_channel *channel;
struct qeth_card *card;
-   struct qeth_cmd_buffer *iob;
-
-   if (__qeth_check_irb_error(cdev, intparm, irb))
-   return;
-   cstat = irb->scsw.cmd.cstat;
-   dstat = irb->scsw.cmd.dstat;
 
card = CARD_FROM_CDEV(cdev);
if (!card)
@@ -1123,6 +1117,19 @@ static void qeth_irq(struct ccw_device *cdev, unsigned 
long intparm,
channel = &card->data;
QETH_CARD_TEXT(card, 5, "data");
}
+
+   if (qeth_intparm_is_iob(intparm))
+   iob = (struct qeth_cmd_buffer *) __va((addr_t)intparm);
+
+   if (__qeth_check_irb_error(cdev, intparm, irb)) {
+   /* IO was terminated, free its resources. */
+   if (iob)
+   qeth_release_buffer(iob->channel, iob);
+   atomic_set(&channel->irq_pending, 0);
+   wake_up(&card->wait_q);
+   return;
+   }
+
atomic_set(&channel->irq_pending, 0);
 
if (irb->scsw.cmd.fctl & (SCSW_FCTL_CLEAR_FUNC))
@@ -1146,6 +1153,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned 
long intparm,
/* we don't have to handle this further */
intparm = 0;
}
+
+   cstat = irb->scsw.cmd.cstat;
+   dstat = irb->scsw.cmd.dstat;
+
if ((dstat & DEV_STAT_UNIT_EXCEP) ||
(dstat & DEV_STAT_UNIT_CHECK) ||
(cstat)) {
@@ -1184,11 +1195,8 @@ static void qeth_irq(struct ccw_device *cdev, unsigned 
long intparm,
channel->state == CH_STATE_UP)
__qeth_issue_next_read(card);
 
-   if (intparm) {
-   iob = (struct qeth_cmd_buffer *) __va((addr_t)intparm);
-   if (iob->callback)
-   iob->callback(iob->channel, iob);
-   }
+   if (iob && iob->callback)
+   iob->callback(iob->channel, iob);
 
 out:
wake_up(&card->wait_q);
@@ -1859,8 +1867,8 @@ static int qeth_idx_activate_get_answer(struct 
qeth_channel *channel,
   atomic_cmpxchg(&channel->irq_pending, 0, 1) == 0);
QETH_DBF_TEXT(SETUP, 6, "noirqpnd");
spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
-   rc = ccw_device_start(channel->ccwdev,
- &channel->ccw, (addr_t) iob, 0, 0);
+   rc = ccw_device_start_timeout(channel->ccwdev, &channel->ccw,
+ (addr_t) iob, 0, 0, QETH_TIMEOUT);
spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags);
 
if (rc) {
@@ -1877,7 +1885,6 @@ static int qeth_idx_activate_get_answer(struct 
qeth_channel *channel,
if (channel->state != CH_STATE_UP) {
rc = -ETIME;
QETH_DBF_TEXT_(SETUP, 2, "3err%d", rc);
-   qeth_clear_cmd_buffers(channel);
} else
rc = 0;
return rc;
@@ -1931,8 +1938,8 @@ static int qeth_idx_activate_channel(struct qeth_channel 
*channel,
   atomic_cmpxchg(&channel->irq_pending, 0, 1) == 0);
QETH_DBF_TEXT(SETUP, 6, "noirqpnd");
spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
-   rc = ccw_device_start(channel->

[PATCH net 6/6] s390/qeth: use Read device to query hypervisor for MAC

2018-04-19 Thread Julian Wiedmann

From: Julian Wiedmann 

For z/VM NICs, qeth needs to consider which of the three CCW devices in
an MPC group it uses for requesting a managed MAC address.

On the Base device, the hypervisor returns a default MAC which is
pre-assigned when creating the NIC (this MAC is also returned by the
READ MAC primitive). Querying any other device results in the allocation
of an additional MAC address.

For consistency with READ MAC and to avoid using up more addresses than
necessary, it is preferable to use the NIC's default MAC. So switch the
the diag26c over to using a NIC's Read device, which should always be
identical to the Base device.

Fixes: ec61bd2fd2a2 ("s390/qeth: use diag26c to get MAC address on L2")
Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index 9b22d5d496ae..dffd820731f2 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -4835,7 +4835,7 @@ int qeth_vm_request_mac(struct qeth_card *card)
goto out;
}
 
-   ccw_device_get_id(CARD_DDEV(card), &id);
+   ccw_device_get_id(CARD_RDEV(card), &id);
request->resp_buf_len = sizeof(*response);
request->resp_version = DIAG26C_VERSION2;
request->op_code = DIAG26C_GET_MAC;
-- 
2.13.5

Re: [PATCH] bpf, x86_32: add eBPF JIT compiler for ia32 (x86_32)

2018-04-19 Thread Thomas Gleixner

On Wed, 18 Apr 2018, Wang YanQing wrote:
> @@ -0,0 +1,147 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* bpf_jit.S : BPF JIT helper functions

Please do not add these file names to the top level comment. They provide
no value and just become stale when the file gets moved/renamed.

> + *
> + * Copyright (C) 2018 Wang YanQing (udkni...@gmail.com)
> + * Copyright (C) 2011 Eric Dumazet (eric.duma...@gmail.com)
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; version 2
> + * of the License.

You have already the License Identifier. So you don't need the boiler plate
text.

Thanks,

tglx

Re: [PATCH net-next v3] team: account for oper state

2018-04-19 Thread Jiri Pirko

Thu, Apr 19, 2018 at 12:34:14PM CEST, gwil...@vyatta.att-mail.com wrote:
>Account for operational state when determining port linkup state,
>as per Documentation/networking/operstates.txt.
>
>Signed-off-by: George Wilkie 

Acked-by: Jiri Pirko

Re: [PATCH bpf-next v3 4/8] bpf: add documentation for eBPF helpers (23-32)

2018-04-19 Thread Daniel Borkmann

On 04/17/2018 04:34 PM, Quentin Monnet wrote:
> Add documentation for eBPF helper functions to bpf.h user header file.
> This documentation can be parsed with the Python script provided in
> another commit of the patch series, in order to provide a RST document
> that can later be converted into a man page.
> 
> The objective is to make the documentation easily understandable and
> accessible to all eBPF developers, including beginners.
> 
> This patch contains descriptions for the following helper functions, all
> written by Daniel:
> 
> - bpf_get_prandom_u32()
> - bpf_get_smp_processor_id()
> - bpf_get_cgroup_classid()
> - bpf_get_route_realm()
> - bpf_skb_load_bytes()
> - bpf_csum_diff()
> - bpf_skb_get_tunnel_opt()
> - bpf_skb_set_tunnel_opt()
> - bpf_skb_change_proto()
> - bpf_skb_change_type()
> 
> v3:
> - bpf_get_prandom_u32(): Fix helper name :(. Add description, including
>   a note on the internal random state.
> - bpf_get_smp_processor_id(): Add description, including a note on the
>   processor id remaining stable during program run.
> - bpf_get_cgroup_classid(): State that CONFIG_CGROUP_NET_CLASSID is
>   required to use the helper. Add a reference to related documentation.
>   State that placing a task in net_cls controller disables cgroup-bpf.
> - bpf_get_route_realm(): State that CONFIG_CGROUP_NET_CLASSID is
>   required to use this helper.
> - bpf_skb_load_bytes(): Fix comment on current use cases for the helper.
> 
> Cc: Daniel Borkmann 
> Signed-off-by: Quentin Monnet 
> ---
>  include/uapi/linux/bpf.h | 152 
> +++
>  1 file changed, 152 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index c59bf5b28164..d748f65a8f58 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -483,6 +483,23 @@ union bpf_attr {
>   *   The number of bytes written to the buffer, or a negative error
>   *   in case of failure.
>   *
> + * u32 bpf_get_prandom_u32(void)
> + *   Description
> + *   Get a pseudo-random number. Note that this helper uses its own
> + *   pseudo-random internal state, and cannot be used to infer the
> + *   seed of other random functions in the kernel.

We should still add that this prng is not cryptographically secure.

> + *   Return
> + *   A random 32-bit unsigned value.
> + *
> + * u32 bpf_get_smp_processor_id(void)
> + *   Description
> + *   Get the SMP (Symmetric multiprocessing) processor id. Note that

Nit: s/Symmetric/symmetric/ ?

> + *   all programs run with preemption disabled, which means that the
> + *   SMP processor id is stable during all the execution of the
> + *   program.
> + *   Return
> + *   The SMP id of the processor running the program.
> + *
>   * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void 
> *from, u32 len, u64 flags)
>   *   Description
>   *   Store *len* bytes from address *from* into the packet
> @@ -615,6 +632,27 @@ union bpf_attr {
>   *   Return
>   *   0 on success, or a negative error in case of failure.
>   *
> + * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
> + *   Description
> + *   Retrieve the classid for the current task, i.e. for the
> + *   net_cls (network classifier) cgroup to which *skb* belongs.
> + *
> + *   This helper is only available is the kernel was compiled with
> + *   the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
> + *   "**y**" or to "**m**".
> + *
> + *   Note that placing a task into the net_cls controller completely
> + *   disables the execution of eBPF programs with the cgroup.

I'm not sure I follow the above sentence, what do you mean by that?

I would definitely also add here that this helper is limited to cgroups v1
only, and that it works on clsact TC egress hook but not the ingress one.

> + *   Also note that, in the above description, the "network
> + *   classifier" cgroup does not designate a generic classifier, but
> + *   a particular mechanism that provides an interface to tag
> + *   network packets with a specific class identifier. See also the

The "generic classifier" part is a bit strange to parse. I would probably
leave the first part out and explain that this provides a means to tag
packets based on a user-provided ID for all traffic coming from the tasks
belonging to the related cgroup.

> + *   related kernel documentation, available from the Linux sources
> + *   in file *Documentation/cgroup-v1/net_cls.txt*.
> + *   Return
> + *   The classid, or 0 for the default unconfigured classid.
> + *
>   * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 
> vlan_tci)
>   *   Description
>   *   Push a *vlan_tci* (VLAN tag control information) of protocol
> @@ -734,6 +772,16 @@ union bpf_attr {
>   *   are **TC_

Re: [PATCH 3/3] ath10k: Support ethtool gstats2 API.

2018-04-19 Thread kbuild test robot

Hi Ben,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on mac80211/master]
[also build test ERROR on v4.17-rc1 next-20180419]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/greearb-candelatech-com/ethtool-Support-ETHTOOL_GSTATS2-command/20180419-105301
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git 
master
config: x86_64-randconfig-ne0-04191514 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All errors (new ones prefixed by >>):

>> drivers/net/wireless/ath/ath10k/mac.c:7706:21: error: 
>> 'ath10k_debug_get_et_stats2' undeclared here (not in a function)
 .get_et_stats2   = ath10k_debug_get_et_stats2,
^~

vim +/ath10k_debug_get_et_stats2 +7706 drivers/net/wireless/ath/ath10k/mac.c

  7672  
  7673  static const struct ieee80211_ops ath10k_ops = {
  7674  .tx = ath10k_mac_op_tx,
  7675  .wake_tx_queue  = ath10k_mac_op_wake_tx_queue,
  7676  .start  = ath10k_start,
  7677  .stop   = ath10k_stop,
  7678  .config = ath10k_config,
  7679  .add_interface  = ath10k_add_interface,
  7680  .remove_interface   = ath10k_remove_interface,
  7681  .configure_filter   = ath10k_configure_filter,
  7682  .bss_info_changed   = ath10k_bss_info_changed,
  7683  .set_coverage_class = 
ath10k_mac_op_set_coverage_class,
  7684  .hw_scan= ath10k_hw_scan,
  7685  .cancel_hw_scan = ath10k_cancel_hw_scan,
  7686  .set_key= ath10k_set_key,
  7687  .set_default_unicast_key= 
ath10k_set_default_unicast_key,
  7688  .sta_state  = ath10k_sta_state,
  7689  .conf_tx= ath10k_conf_tx,
  7690  .remain_on_channel  = ath10k_remain_on_channel,
  7691  .cancel_remain_on_channel   = 
ath10k_cancel_remain_on_channel,
  7692  .set_rts_threshold  = ath10k_set_rts_threshold,
  7693  .set_frag_threshold = 
ath10k_mac_op_set_frag_threshold,
  7694  .flush  = ath10k_flush,
  7695  .tx_last_beacon = ath10k_tx_last_beacon,
  7696  .set_antenna= ath10k_set_antenna,
  7697  .get_antenna= ath10k_get_antenna,
  7698  .reconfig_complete  = ath10k_reconfig_complete,
  7699  .get_survey = ath10k_get_survey,
  7700  .set_bitrate_mask   = 
ath10k_mac_op_set_bitrate_mask,
  7701  .sta_rc_update  = ath10k_sta_rc_update,
  7702  .offset_tsf = ath10k_offset_tsf,
  7703  .ampdu_action   = ath10k_ampdu_action,
  7704  .get_et_sset_count  = 
ath10k_debug_get_et_sset_count,
  7705  .get_et_stats   = ath10k_debug_get_et_stats,
> 7706  .get_et_stats2  = ath10k_debug_get_et_stats2,
  7707  .get_et_strings = ath10k_debug_get_et_strings,
  7708  .add_chanctx= ath10k_mac_op_add_chanctx,
  7709  .remove_chanctx = ath10k_mac_op_remove_chanctx,
  7710  .change_chanctx = ath10k_mac_op_change_chanctx,
  7711  .assign_vif_chanctx = 
ath10k_mac_op_assign_vif_chanctx,
  7712  .unassign_vif_chanctx   = 
ath10k_mac_op_unassign_vif_chanctx,
  7713  .switch_vif_chanctx = 
ath10k_mac_op_switch_vif_chanctx,
  7714  .sta_pre_rcu_remove = 
ath10k_mac_op_sta_pre_rcu_remove,
  7715  .sta_statistics = ath10k_sta_statistics,
  7716  
  7717  CFG80211_TESTMODE_CMD(ath10k_tm_cmd)
  7718  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

[PATCH] net: deal wrong skb and failure ret from __tcp_retransmit_skb

2018-04-19 Thread Liu, Changcheng

Hit below panic due to skb is NULL, WARN wrong skb first.
if __tcp_retransmit_skb return failure e.g. -EAGAIN, it
needn't do further action in tcp_retransmit_skb.

gdb vmlinux
Reading symbols from vmlinux...done.
(gdb) p &((struct tcp_skb_cb *) \
&(((struct sk_buff *)0)->cb[0]))->tcp_gso_segs
$1 = (u16 *) 0x30 

[ 9040.917533] BUG: unable to handle kernel NULL pointer dereference at 
0030
[ 9040.926279] IP: tcp_retransmit_skb+0x5c/0xc0
[ 9040.931043] PGD 0 P4D 0
[ 9040.933865] Oops:  [#1] PREEMPT SMP PTI
[ 9040.972151] RIP: 0010:tcp_retransmit_skb+0x5c/0xc0
[ 9040.977496] RSP: 0018:8802bec83e40 EFLAGS: 00010202
[ 9041.062527] Call Trace:
[ 9041.065250]  
[ 9041.067489]  tcp_retransmit_timer+0x481/0x820
[ 9041.077697]  tcp_write_timer_handler+0xe9/0x230
[ 9041.082751]  tcp_write_timer+0x75/0x80
[ 9041.086932]  call_timer_fn+0x29/0x150
[ 9041.091018]  run_timer_softirq+0x411/0x460
[ 9041.105017]  __do_softirq+0x115/0x311
[ 9041.109103]  irq_exit+0xb0/0xc0
[ 9041.112605]  smp_apic_timer_interrupt+0x67/0x140

Signed-off-by: Liu Changcheng 

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 383cac0..545b9b3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2920,7 +2920,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff 
*skb, int segs)
 int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 {
struct tcp_sock *tp = tcp_sk(sk);
-   int err = __tcp_retransmit_skb(sk, skb, segs);
+   int err = 0;
+
+   WARN_ONCE(!skb, "sk_buff is NULL\n");
+   err = __tcp_retransmit_skb(sk, skb, segs);
 
if (err == 0) {
 #if FASTRETRANS_DEBUG > 0
@@ -2935,6 +2938,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff 
*skb, int segs)
if (!tp->retrans_stamp)
tp->retrans_stamp = tcp_skb_timestamp(skb);
 
+   } else {
+   return err;
}
 
if (tp->undo_retrans < 0)
-- 
2.7.4

Re: [PATCH] net: phy: marvell: clear wol event before setting it

2018-04-19 Thread Andrew Lunn

> >IIRC, the phy irq isn't necessary for WOL. The phy interrupt pin isn't
> >necessarily taken as "interrupt"
> Please correct me if I am wrong. In this case how the system will wake up
> from the SC7.There has to be wake capable irq/gpio pin to do this operation.
> 
Hi Bhadram

I've seem implementations where the line from the PHY is connected to
the power supply. It simply turns the power on. No interrupt needed.

Andrew

Re: [PATCH net 1/3] net: sched: ife: signal not finding metaid

2018-04-19 Thread Jamal Hadi Salim


On 19/04/18 01:37 AM, yotam gigi wrote:

On Thu, Apr 19, 2018 at 12:35 AM, Alexander Aring  wrote:

We need to record stats for received metadata that we dont know how
to process. Have find_decode_metaid() return -ENOENT to capture this.


Agree.



Signed-off-by: Alexander Aring 


Reviewed-by: Yotam Gigi 


Acked-by: Jamal Hadi Salim

Re: [PATCH] net: phy: TLK10X initial driver submission

2018-04-19 Thread Andrew Lunn

On Thu, Apr 19, 2018 at 10:28:16AM +0200, Måns Andersson wrote:
> From: Mans Andersson 
> 
> Add suport for the TI TLK105 and TLK106 10/100Mbit ethernet phys.
> 
> In addition the TLK10X needs to be removed from DP83848 driver as the
> power back off support is added here for this device.
> 
> Datasheet:
> http://www.ti.com/lit/gpn/tlk106
> ---
>  .../devicetree/bindings/net/ti,tlk10x.txt  |  27 +++
>  drivers/net/phy/Kconfig|   5 +
>  drivers/net/phy/Makefile   |   1 +
>  drivers/net/phy/dp83848.c  |   3 -
>  drivers/net/phy/tlk10x.c   | 209 
> +
>  5 files changed, 242 insertions(+), 3 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/net/ti,tlk10x.txt
>  create mode 100644 drivers/net/phy/tlk10x.c
> 
> diff --git a/Documentation/devicetree/bindings/net/ti,tlk10x.txt 
> b/Documentation/devicetree/bindings/net/ti,tlk10x.txt
> new file mode 100644
> index 000..371d0d7
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/net/ti,tlk10x.txt
> @@ -0,0 +1,27 @@
> +* Texas Instruments - TLK105 / TLK106 ethernet PHYs
> +
> +Required properties:
> + - reg - The ID number for the phy, usually a small integer
> +
> +Optional properties:
> + - ti,power-back-off - Power Back Off Level
> + Please refer to data sheet chapter 8.6 and TI Application
> + Note SLLA3228
> + 0 - Normal Operation
> + 1 - Level 1 (up to 140m cable between TLK link partners)
> + 2 - Level 2 (up to 100m cable between TLK link partners)
> + 3 - Level 3 (up to 80m cable between TLK link partners)

Hi Måns

Device tree is all about board properties. In most cases, power back
off is not a board properties, since it depends on the cable length
and the peer board. If however, your board has two PHYs back to back,
say to connect to an Ethernet switch, that would be a valid board
property.

How are you using this?

I know of others who would like such a configuration. Marvell PHYs can
do something similar. I've always suggested adding a PHY tunable. Pass
the cable length in meters and let the PHY driver pick the nearest it
can do, rounding up. The Marvell PHYs also support measuring the cable
length as part of the cable diagnostics. So it would be good to
reserve a configuration value to mean 'auto' - measure the cable and
then pick the best power back off. Quickly scanning the data sheet, i
see that this PHY also has the ability to measure the cable length.

> +static int tlk10x_read(struct phy_device *phydev, int reg)
> +{
> + if (reg & ~0x1f) {
> + /* Extended register */
> + phy_write(phydev, TLK10X_REGCR, 0x001F);
> + phy_write(phydev, TLK10X_ADDAR, reg);
> + phy_write(phydev, TLK10X_REGCR, 0x401F);
> + reg = TLK10X_ADDAR;
> + }
> +
> + return phy_read(phydev, reg);
> +}
> +
> +static int tlk10x_write(struct phy_device *phydev, int reg, int val)
> +{
> + if (reg & ~0x1f) {
> + /* Extended register */
> + phy_write(phydev, TLK10X_REGCR, 0x001F);
> + phy_write(phydev, TLK10X_ADDAR, reg);
> + phy_write(phydev, TLK10X_REGCR, 0x401F);
> + reg = TLK10X_ADDAR;
> + }
> +
> + return phy_write(phydev, reg, val);
> +}

This looks to be phy_read_mmd() and phy_write_mmd(). If so, please use
them, they get the locking correct.


> +#ifdef CONFIG_OF_MDIO
> +static int tlk10x_of_init(struct phy_device *phydev)
> +{
> + struct tlk10x_private *tlk10x = phydev->priv;
> + struct device *dev = &phydev->mdio.dev;
> + struct device_node *of_node = dev->of_node;
> + int ret;
> +
> + if (!of_node)
> + return 0;
> +
> + ret = of_property_read_u32(of_node, "ti,power-back-off",
> +&tlk10x->pwrbo_level);
> + if (ret) {
> + dev_err(dev, "missing ti,power-back-off property");
> + tlk10x->pwrbo_level = 0;
> + }

If we decide to accept this, you should do range checking, and return
-EINVAL if the value is out of range.

> +static int tlk10x_config_init(struct phy_device *phydev)
> +{
> + int ret, reg;
> + struct tlk10x_private *tlk10x;
> +
> + ret = genphy_config_init(phydev);
> + if (ret < 0)
> + return ret;
> +
> + if (!phydev->priv) {
> + tlk10x = devm_kzalloc(&phydev->mdio.dev, sizeof(*tlk10x),
> +   GFP_KERNEL);
> + if (!tlk10x)
> + return -ENOMEM;
> +
> + phydev->priv = tlk10x;
> + ret = tlk10x_of_init(phydev);
> + if (ret)
> + return ret;
> + } else {
> + tlk10x = (struct tlk10x_private *)phydev->priv;
> + }

This allocation should be done in .probe

> +
> + // Power back off
> + if (tlk10x->pwrbo_level < 0 || tlk10

Re: [PATCH net 2/3] net: sched: ife: handle malformed tlv length

2018-04-19 Thread Jamal Hadi Salim


On 19/04/18 01:37 AM, yotam gigi wrote:

On Thu, Apr 19, 2018 at 12:35 AM, Alexander Aring  wrote:

There is currently no handling to check on a invalid tlv length. This
patch adds such handling to avoid killing the kernel with a malformed
ife packet.


That's very important. Thanks for that!



Signed-off-by: Alexander Aring 


Acked-by: Jamal Hadi Salim 

cheers,
jamal

Re: [PATCH net 3/3] net: sched: ife: check on metadata length

2018-04-19 Thread Jamal Hadi Salim


On 19/04/18 01:37 AM, yotam gigi wrote:

On Thu, Apr 19, 2018 at 12:35 AM, Alexander Aring  wrote:

This patch checks if sk buffer is available to dererence ife header. If
not then NULL will returned to signal an malformed ife packet. This
avoids to crashing the kernel from outside.

Signed-off-by: Alexander Aring 


Reviewed-by: Yotam Gigi 



Acked-by: Jamal Hadi Salim 

cheers,
jamal

Re: [PATCH] net: phy: marvell: clear wol event before setting it

2018-04-19 Thread Andrew Lunn

On Thu, Apr 19, 2018 at 04:02:32PM +0800, Jisheng Zhang wrote:
> From: Jingju Hou 
> 
> If WOL event happened once, the LED[2] interrupt pin will not be
> cleared unless reading the CSISR register. So clear the WOL event
> before enabling it.
> 
> Signed-off-by: Jingju Hou 
> Signed-off-by: Jisheng Zhang 
> ---
>  drivers/net/phy/marvell.c | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
> index c22e8e383247..b6abe1cbc84b 100644
> --- a/drivers/net/phy/marvell.c
> +++ b/drivers/net/phy/marvell.c
> @@ -115,6 +115,9 @@
>  /* WOL Event Interrupt Enable */
>  #define MII_88E1318S_PHY_CSIER_WOL_EIE   BIT(7)
>  
> +/* Copper Specific Interrupt Status Register */
> +#define MII_88E1318S_PHY_CSISR   0x13
> +
>  /* LED Timer Control Register */
>  #define MII_88E1318S_PHY_LED_TCR 0x12
>  #define MII_88E1318S_PHY_LED_TCR_FORCE_INT   BIT(15)
> @@ -1393,6 +1396,12 @@ static int m88e1318_set_wol(struct phy_device *phydev,
>   if (err < 0)
>   goto error;
>  
> + /* If WOL event happened once, the LED[2] interrupt pin
> +  * will not be cleared unless reading the CSISR register.
> +  * So clear the WOL event first before enabling it.
> +  */
> + phy_read(phydev, MII_88E1318S_PHY_CSISR);
> +

Hi Jisheng

The problem with this is, you could be clearing a real interrupt, link
down/up etc. If interrupts are in use, i think the normal interrupt
handling will clear the WOL interrupt? So can you make this read
conditional on !phy_interrupt_is_valid()?

Andrew

Re: [PATCH v4 1/9] net-next: phy: new Asix Electronics PHY driver

2018-04-19 Thread Andrew Lunn

On Thu, Apr 19, 2018 at 02:05:18PM +1200, Michael Schmitz wrote:
> The Asix Electronics PHY found on the X-Surf 100 Amiga Zorro network
> card by Individual Computers is buggy, and needs the reset bit toggled
> as workaround to make a PHY soft reset succeed.
> 
> Add workaround driver just for this special case.
> 
> Suggested in xsurf100 patch series review by Andrew Lunn 
> 
> Signed-off-by: Michael Schmitz 

Reviewed-by: Andrew Lunn 

Andrew

Re: [RFC PATCH ghak32 V2 04/13] audit: add containerid filtering

2018-04-19 Thread Richard Guy Briggs

On 2018-04-18 20:24, Paul Moore wrote:
> On Fri, Mar 16, 2018 at 5:00 AM, Richard Guy Briggs  wrote:
> > Implement container ID filtering using the AUDIT_CONTAINERID field name
> > to send an 8-character string representing a u64 since the value field
> > is only u32.
> >
> > Sending it as two u32 was considered, but gathering and comparing two
> > fields was more complex.
> 
> My only worry here is that you aren't really sending a string in the
> ASCII sense, you are sending an 8 byte buffer (that better be NUL
> terminated) that happens to be an unsigned 64-bit integer.  To be
> clear, I'm okay with that (it's protected by AUDIT_CONTAINERID), and
> the code is okay with that, I just want us to pause for a minute and
> make sure that is an okay thing to do long term.

I already went through that process and warned of it 7 weeks ago.  As
already noted, That was preferable to two seperate u32 fields that
depend on each other making comparisons more complicated.  Using two
seperate fields to configure the rule could be gated for validity, then
the result stored in a special rule field, but I wasn't keen about that
approach.

> > The feature indicator is AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER.
> >
> > This requires support from userspace to be useful.
> > See: https://github.com/linux-audit/audit-userspace/issues/40
> > Signed-off-by: Richard Guy Briggs 
> > ---
> >  include/linux/audit.h  |  1 +
> >  include/uapi/linux/audit.h |  5 -
> >  kernel/audit.h |  1 +
> >  kernel/auditfilter.c   | 47 
> > ++
> >  kernel/auditsc.c   |  3 +++
> >  5 files changed, 56 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/audit.h b/include/linux/audit.h
> > index 3acbe9d..f10ca1b 100644
> > --- a/include/linux/audit.h
> > +++ b/include/linux/audit.h
> > @@ -76,6 +76,7 @@ struct audit_field {
> > u32 type;
> > union {
> > u32 val;
> > +   u64 val64;
> > kuid_t  uid;
> > kgid_t  gid;
> > struct {
> > diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> > index e83ccbd..8443a8f 100644
> > --- a/include/uapi/linux/audit.h
> > +++ b/include/uapi/linux/audit.h
> > @@ -262,6 +262,7 @@
> >  #define AUDIT_LOGINUID_SET 24
> >  #define AUDIT_SESSIONID25  /* Session ID */
> >  #define AUDIT_FSTYPE   26  /* FileSystem Type */
> > +#define AUDIT_CONTAINERID  27  /* Container ID */
> >
> > /* These are ONLY useful when checking
> >  * at syscall exit time (AUDIT_AT_EXIT). */
> > @@ -342,6 +343,7 @@ enum {
> >  #define AUDIT_FEATURE_BITMAP_SESSIONID_FILTER  0x0010
> >  #define AUDIT_FEATURE_BITMAP_LOST_RESET0x0020
> >  #define AUDIT_FEATURE_BITMAP_FILTER_FS 0x0040
> > +#define AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER0x0080
> >
> >  #define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \
> >   AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \
> > @@ -349,7 +351,8 @@ enum {
> >   AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND | \
> >   AUDIT_FEATURE_BITMAP_SESSIONID_FILTER | \
> >   AUDIT_FEATURE_BITMAP_LOST_RESET | \
> > - AUDIT_FEATURE_BITMAP_FILTER_FS)
> > + AUDIT_FEATURE_BITMAP_FILTER_FS | \
> > + AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER)
> >
> >  /* deprecated: AUDIT_VERSION_* */
> >  #define AUDIT_VERSION_LATEST   AUDIT_FEATURE_BITMAP_ALL
> > diff --git a/kernel/audit.h b/kernel/audit.h
> > index 214e149..aaa651a 100644
> > --- a/kernel/audit.h
> > +++ b/kernel/audit.h
> > @@ -234,6 +234,7 @@ static inline int audit_hash_ino(u32 ino)
> >
> >  extern int audit_match_class(int class, unsigned syscall);
> >  extern int audit_comparator(const u32 left, const u32 op, const u32 right);
> > +extern int audit_comparator64(const u64 left, const u32 op, const u64 
> > right);
> >  extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right);
> >  extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right);
> >  extern int parent_len(const char *path);
> > diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
> > index d7a807e..c4c8746 100644
> > --- a/kernel/auditfilter.c
> > +++ b/kernel/auditfilter.c
> > @@ -410,6 +410,7 @@ static int audit_field_valid(struct audit_entry *entry, 
> > struct audit_field *f)
> > /* FALL THROUGH */
> > case AUDIT_ARCH:
> > case AUDIT_FSTYPE:
> > +   case AUDIT_CONTAINERID:
> > if (f->op != Audit_not_equal && f->op != Audit_equal)
> > return -EINVAL;
> > brea

Re: [PATCH v2 net 1/3] virtio_net: split out ctrl buffer

2018-04-19 Thread Jason Wang




On 2018年04月19日 13:30, Michael S. Tsirkin wrote:

When sending control commands, virtio net sets up several buffers for
DMA. The buffers are all part of the net device which means it's
actually allocated by kvmalloc so it's in theory (on extreme memory
pressure) possible to get a vmalloc'ed buffer which on some platforms
means we can't DMA there.

Fix up by moving the DMA buffers into a separate structure.

Reported-by: Mikulas Patocka 
Suggested-by: Eric Dumazet 
Signed-off-by: Michael S. Tsirkin 
---

Changes from v1:
build fix

  drivers/net/virtio_net.c | 68 +++-
  1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7b187ec..3d0eff53 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -147,6 +147,17 @@ struct receive_queue {
struct xdp_rxq_info xdp_rxq;
  };
  
+/* Control VQ buffers: protected by the rtnl lock */

+struct control_buf {
+   struct virtio_net_ctrl_hdr hdr;
+   virtio_net_ctrl_ack status;
+   struct virtio_net_ctrl_mq mq;
+   u8 promisc;
+   u8 allmulti;
+   u16 vid;
+   u64 offloads;
+};
+
  struct virtnet_info {
struct virtio_device *vdev;
struct virtqueue *cvq;
@@ -192,14 +203,7 @@ struct virtnet_info {
struct hlist_node node;
struct hlist_node node_dead;
  
-	/* Control VQ buffers: protected by the rtnl lock */

-   struct virtio_net_ctrl_hdr ctrl_hdr;
-   virtio_net_ctrl_ack ctrl_status;
-   struct virtio_net_ctrl_mq ctrl_mq;
-   u8 ctrl_promisc;
-   u8 ctrl_allmulti;
-   u16 ctrl_vid;
-   u64 ctrl_offloads;
+   struct control_buf *ctrl;
  
  	/* Ethtool settings */

u8 duplex;
@@ -1454,25 +1458,25 @@ static bool virtnet_send_command(struct virtnet_info 
*vi, u8 class, u8 cmd,
/* Caller should know better */
BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
  
-	vi->ctrl_status = ~0;

-   vi->ctrl_hdr.class = class;
-   vi->ctrl_hdr.cmd = cmd;
+   vi->ctrl->status = ~0;
+   vi->ctrl->hdr.class = class;
+   vi->ctrl->hdr.cmd = cmd;
/* Add header */
-   sg_init_one(&hdr, &vi->ctrl_hdr, sizeof(vi->ctrl_hdr));
+   sg_init_one(&hdr, &vi->ctrl->hdr, sizeof(vi->ctrl->hdr));
sgs[out_num++] = &hdr;
  
  	if (out)

sgs[out_num++] = out;
  
  	/* Add return status. */

-   sg_init_one(&stat, &vi->ctrl_status, sizeof(vi->ctrl_status));
+   sg_init_one(&stat, &vi->ctrl->status, sizeof(vi->ctrl->status));
sgs[out_num] = &stat;
  
  	BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));

virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
  
  	if (unlikely(!virtqueue_kick(vi->cvq)))

-   return vi->ctrl_status == VIRTIO_NET_OK;
+   return vi->ctrl->status == VIRTIO_NET_OK;
  
  	/* Spin for a response, the kick causes an ioport write, trapping

 * into the hypervisor, so the request should be handled immediately.
@@ -1481,7 +1485,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, 
u8 class, u8 cmd,
   !virtqueue_is_broken(vi->cvq))
cpu_relax();
  
-	return vi->ctrl_status == VIRTIO_NET_OK;

+   return vi->ctrl->status == VIRTIO_NET_OK;
  }
  
  static int virtnet_set_mac_address(struct net_device *dev, void *p)

@@ -1593,8 +1597,8 @@ static int _virtnet_set_queues(struct virtnet_info *vi, 
u16 queue_pairs)
if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
return 0;
  
-	vi->ctrl_mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);

-   sg_init_one(&sg, &vi->ctrl_mq, sizeof(vi->ctrl_mq));
+   vi->ctrl->mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
+   sg_init_one(&sg, &vi->ctrl->mq, sizeof(vi->ctrl->mq));
  
  	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,

  VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
@@ -1653,22 +1657,22 @@ static void virtnet_set_rx_mode(struct net_device *dev)
if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
return;
  
-	vi->ctrl_promisc = ((dev->flags & IFF_PROMISC) != 0);

-   vi->ctrl_allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
+   vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
+   vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
  
-	sg_init_one(sg, &vi->ctrl_promisc, sizeof(vi->ctrl_promisc));

+   sg_init_one(sg, &vi->ctrl->promisc, sizeof(vi->ctrl->promisc));
  
  	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,

  VIRTIO_NET_CTRL_RX_PROMISC, sg))
dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
-vi->ctrl_promisc ? "en" : "dis");
+vi->ctrl->promisc ? "en" : "dis");
  
-	sg_init_one(sg, &vi->ctrl_allmulti, sizeof(vi->ctrl_allmulti));

+   sg_init_one(sg, &vi->ctrl->

Re: [PATCH v2 net 2/3] virtio_net: fix adding vids on big-endian

2018-04-19 Thread Jason Wang




On 2018年04月19日 13:30, Michael S. Tsirkin wrote:

Programming vids (adding or removing them) still passes
guest-endian values in the DMA buffer. That's wrong
if guest is big-endian and when virtio 1 is enabled.

Note: this is on top of a previous patch:
virtio_net: split out ctrl buffer

Fixes: 9465a7a6f ("virtio_net: enable v1.0 support")
Signed-off-by: Michael S. Tsirkin 
---
  drivers/net/virtio_net.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 3d0eff53..f84fe04 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -154,7 +154,7 @@ struct control_buf {
struct virtio_net_ctrl_mq mq;
u8 promisc;
u8 allmulti;
-   u16 vid;
+   __virtio16 vid;
u64 offloads;
  };
  
@@ -1718,7 +1718,7 @@ static int virtnet_vlan_rx_add_vid(struct net_device *dev,

struct virtnet_info *vi = netdev_priv(dev);
struct scatterlist sg;
  
-	vi->ctrl->vid = vid;

+   vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
  
  	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,

@@ -1733,7 +1733,7 @@ static int virtnet_vlan_rx_kill_vid(struct net_device 
*dev,
struct virtnet_info *vi = netdev_priv(dev);
struct scatterlist sg;
  
-	vi->ctrl->vid = vid;

+   vi->ctrl->vid = cpu_to_virtio16(vi->vdev, vid);
sg_init_one(&sg, &vi->ctrl->vid, sizeof(vi->ctrl->vid));
  
  	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,


Acked-by: Jason Wang

Re: [PATCH bpf-next v3 5/8] bpf: add documentation for eBPF helpers (33-41)

2018-04-19 Thread Daniel Borkmann

On 04/17/2018 04:34 PM, Quentin Monnet wrote:
> Add documentation for eBPF helper functions to bpf.h user header file.
> This documentation can be parsed with the Python script provided in
> another commit of the patch series, in order to provide a RST document
> that can later be converted into a man page.
> 
> The objective is to make the documentation easily understandable and
> accessible to all eBPF developers, including beginners.
> 
> This patch contains descriptions for the following helper functions, all
> written by Daniel:
> 
> - bpf_get_hash_recalc()
> - bpf_skb_change_tail()
> - bpf_skb_pull_data()
> - bpf_csum_update()
> - bpf_set_hash_invalid()
> - bpf_get_numa_node_id()
> - bpf_set_hash()
> - bpf_skb_adjust_room()
> - bpf_xdp_adjust_meta()
> 
> Cc: Daniel Borkmann 
> Signed-off-by: Quentin Monnet 
> ---
>  include/uapi/linux/bpf.h | 155 
> +++
>  1 file changed, 155 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index d748f65a8f58..3a40f5debac2 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -965,9 +965,164 @@ union bpf_attr {
>   *   Return
>   *   0 on success, or a negative error in case of failure.
>   *
> + * u32 bpf_get_hash_recalc(struct sk_buff *skb)
> + *   Description
> + *   Retrieve the hash of the packet, *skb*\ **->hash**. If it is
> + *   not set, in particular if the hash was cleared due to mangling,
> + *   recompute this hash. Later accesses to the hash can be done
> + *   directly with *skb*\ **->hash**.
> + *
> + *   Calling **bpf_set_hash_invalid**\ (), changing a packet
> + *   prototype with **bpf_skb_change_proto**\ (), or calling
> + *   **bpf_skb_store_bytes**\ () with the
> + *   **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
> + *   the hash and to trigger a new computation for the next call to
> + *   **bpf_get_hash_recalc**\ ().
> + *   Return
> + *   The 32-bit hash.
> + *
>   * u64 bpf_get_current_task(void)
>   *   Return
>   *   A pointer to the current task struct.
> + *
> + * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
> + *   Description
> + *   Resize (trim or grow) the packet associated to *skb* to the
> + *   new *len*. The *flags* are reserved for future usage, and must
> + *   be left at zero.
> + *
> + *   The basic idea is that the helper performs the needed work to
> + *   change the size of the packet, then the eBPF program rewrites
> + *   the rest via helpers like **bpf_skb_store_bytes**\ (),
> + *   **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
> + *   and others. This helper is a slow path utility intended for
> + *   replies with control messages. And because it is targeted for
> + *   slow path, the helper itself can afford to be slow: it
> + *   implicitly linearizes, unclones and drops offloads from the
> + *   *skb*.
> + *
> + *   A call to this helper is susceptible to change data from the
> + *   packet. Therefore, at load time, all checks on pointers
> + *   previously done by the verifier are invalidated and must be
> + *   performed again.
> + *   Return
> + *   0 on success, or a negative error in case of failure.
> + *
> + * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
> + *   Description
> + *   Pull in non-linear data in case the *skb* is non-linear and not
> + *   all of *len* are part of the linear section. Make *len* bytes
> + *   from *skb* readable and writable. If a zero value is passed for
> + *   *len*, then the whole length of the *skb* is pulled.
> + *
> + *   This helper is only needed for reading and writing with direct
> + *   packet access.
> + *
> + *   For direct packet access, when testing that offsets to access
> + *   are within packet boundaries (test on *skb*\ **->data_end**)
> + *   fails, programs just bail out, or, in the direct read case, use

I would add here to why it can fail, meaning either due to invalid offsets
or due to the requested data being in non-linear parts of the skb where then
either the bpf_skb_load_bytes() can be used as you mentioned or the data
pulled in via bpf_skb_pull_data().

> + *   **bpf_skb_load_bytes()** as an alternative to overcome this
> + *   limitation. If such data sits in non-linear parts, it is
> + *   possible to pull them in once with the new helper, retest and
> + *   eventually access them.

You do this here, but maybe slightly rearranging this one paragraph a bit as
to why one would use either of the helpers would help reading flow a bit.

> + *   At the same time, this also makes sure the skb is uncloned,
> + *   which is a necessary condition for

Re: [PATCH v2 net 3/3] virtio_net: sparse annotation fix

2018-04-19 Thread Jason Wang




On 2018年04月19日 13:30, Michael S. Tsirkin wrote:

offloads is a buffer in virtio format, should use
the __virtio64 tag.

Signed-off-by: Michael S. Tsirkin 
---
  drivers/net/virtio_net.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f84fe04..c5b11f2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -155,7 +155,7 @@ struct control_buf {
u8 promisc;
u8 allmulti;
__virtio16 vid;
-   u64 offloads;
+   __virtio64 offloads;
  };
  
  struct virtnet_info {


Acked-by: Jason Wang

[PATCH net-next 2/4] geneve: cleanup hard coded value for Ethernet header length

2018-04-19 Thread Alexey Kodanev

Use ETH_HLEN instead and introduce two new macros: GENEVE_IPV4_HLEN
and GENEVE_IPV6_HLEN that include Ethernet header length, corresponded
IP header length and GENEVE_BASE_HLEN.

Signed-off-by: Alexey Kodanev 
---
 drivers/net/geneve.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 45acdc9..b650f84 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -36,6 +36,8 @@
 
 #define GENEVE_VER 0
 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
+#define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
+#define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
 
 /* per-network namespace private data for this module */
 struct geneve_net {
@@ -826,8 +828,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct 
net_device *dev,
return PTR_ERR(rt);
 
if (skb_dst(skb)) {
-   int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) -
- GENEVE_BASE_HLEN - info->options_len - 14;
+   int mtu = dst_mtu(&rt->dst) - GENEVE_IPV4_HLEN -
+ info->options_len;
 
skb_dst_update_pmtu(skb, mtu);
}
@@ -872,8 +874,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct 
net_device *dev,
return PTR_ERR(dst);
 
if (skb_dst(skb)) {
-   int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) -
- GENEVE_BASE_HLEN - info->options_len - 14;
+   int mtu = dst_mtu(dst) - GENEVE_IPV6_HLEN - info->options_len;
 
skb_dst_update_pmtu(skb, mtu);
}
-- 
1.8.3.1

Re: [RFC PATCH ghak32 V2 07/13] audit: add container aux record to watch/tree/mark

2018-04-19 Thread Richard Guy Briggs

On 2018-04-18 20:42, Paul Moore wrote:
> On Fri, Mar 16, 2018 at 5:00 AM, Richard Guy Briggs  wrote:
> > Add container ID auxiliary record to mark, watch and tree rule
> > configuration standalone records.
> >
> > Signed-off-by: Richard Guy Briggs 
> > ---
> >  kernel/audit_fsnotify.c |  5 -
> >  kernel/audit_tree.c |  5 -
> >  kernel/audit_watch.c| 33 +++--
> >  3 files changed, 27 insertions(+), 16 deletions(-)
> >
> > diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
> > index 52f368b..18c110d 100644
> > --- a/kernel/audit_fsnotify.c
> > +++ b/kernel/audit_fsnotify.c
> > @@ -124,10 +124,11 @@ static void audit_mark_log_rule_change(struct 
> > audit_fsnotify_mark *audit_mark, c
> >  {
> > struct audit_buffer *ab;
> > struct audit_krule *rule = audit_mark->rule;
> > +   struct audit_context *context = audit_alloc_local();
> >
> > if (!audit_enabled)
> > return;
> 
> Move the audit_alloc_local() after the audit_enabled check.

Already fixed in V3 as previously warned, by making all
AUDIT_CONFIG_CHANGE records SYSCALL auxiliary records.

> > -   ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
> > +   ab = audit_log_start(context, GFP_NOFS, AUDIT_CONFIG_CHANGE);
> > if (unlikely(!ab))
> > return;
> > audit_log_format(ab, "auid=%u ses=%u op=%s",
> > @@ -138,6 +139,8 @@ static void audit_mark_log_rule_change(struct 
> > audit_fsnotify_mark *audit_mark, c
> > audit_log_key(ab, rule->filterkey);
> > audit_log_format(ab, " list=%d res=1", rule->listnr);
> > audit_log_end(ab);
> > +   audit_log_container_info(context, "config", 
> > audit_get_containerid(current));
> > +   audit_free_context(context);
> >  }
> >
> >  void audit_remove_mark(struct audit_fsnotify_mark *audit_mark)
> > diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
> > index 67e6956..7c085be 100644
> > --- a/kernel/audit_tree.c
> > +++ b/kernel/audit_tree.c
> > @@ -496,8 +496,9 @@ static int tag_chunk(struct inode *inode, struct 
> > audit_tree *tree)
> >  static void audit_tree_log_remove_rule(struct audit_krule *rule)
> >  {
> > struct audit_buffer *ab;
> > +   struct audit_context *context = audit_alloc_local();
> 
> Sort of independent of the audit container ID work, but shouldn't we
> have an audit_enabled check here?

Same.

> > -   ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
> > +   ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
> > if (unlikely(!ab))
> > return;
> > audit_log_format(ab, "op=remove_rule");
> > @@ -506,6 +507,8 @@ static void audit_tree_log_remove_rule(struct 
> > audit_krule *rule)
> > audit_log_key(ab, rule->filterkey);
> > audit_log_format(ab, " list=%d res=1", rule->listnr);
> > audit_log_end(ab);
> > +   audit_log_container_info(context, "config", 
> > audit_get_containerid(current));
> > +   audit_free_context(context);
> >  }
> >
> >  static void kill_rules(struct audit_tree *tree)
> > diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
> > index 9eb8b35..60d75a2 100644
> > --- a/kernel/audit_watch.c
> > +++ b/kernel/audit_watch.c
> > @@ -238,20 +238,25 @@ static struct audit_watch *audit_dupe_watch(struct 
> > audit_watch *old)
> >
> >  static void audit_watch_log_rule_change(struct audit_krule *r, struct 
> > audit_watch *w, char *op)
> >  {
> > -   if (audit_enabled) {
> > -   struct audit_buffer *ab;
> > -   ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
> > -   if (unlikely(!ab))
> > -   return;
> > -   audit_log_format(ab, "auid=%u ses=%u op=%s",
> > -from_kuid(&init_user_ns, 
> > audit_get_loginuid(current)),
> > -audit_get_sessionid(current), op);
> > -   audit_log_format(ab, " path=");
> > -   audit_log_untrustedstring(ab, w->path);
> > -   audit_log_key(ab, r->filterkey);
> > -   audit_log_format(ab, " list=%d res=1", r->listnr);
> > -   audit_log_end(ab);
> > -   }
> > +   struct audit_buffer *ab;
> > +   struct audit_context *context = audit_alloc_local();
> > +
> > +   if (!audit_enabled)
> > +   return;
> 
> Same as above, do the allocation after the audit_enabled check.

Same.

> > +   ab = audit_log_start(context, GFP_NOFS, AUDIT_CONFIG_CHANGE);
> > +   if (unlikely(!ab))
> > +   return;
> > +   audit_log_format(ab, "auid=%u ses=%u op=%s",
> > +from_kuid(&init_user_ns, 
> > audit_get_loginuid(current)),
> > +audit_get_sessionid(current), op);
> > +   audit_log_format(ab, " path=");
> > +   audit_log_untrustedstring(ab, w->path);
> > +   audit_log_key(ab, r->filterkey);
> > +   a

Re: [RFC PATCH ghak32 V2 09/13] audit: add containerid support for config/feature/user records

2018-04-19 Thread Richard Guy Briggs

On 2018-04-18 21:27, Paul Moore wrote:
> On Fri, Mar 16, 2018 at 5:00 AM, Richard Guy Briggs  wrote:
> > Add container ID auxiliary records to configuration change, feature set 
> > change
> > and user generated standalone records.
> >
> > Signed-off-by: Richard Guy Briggs 
> > ---
> >  kernel/audit.c   | 50 
> > --
> >  kernel/auditfilter.c |  5 -
> >  2 files changed, 44 insertions(+), 11 deletions(-)
> >
> > diff --git a/kernel/audit.c b/kernel/audit.c
> > index b238be5..08662b4 100644
> > --- a/kernel/audit.c
> > +++ b/kernel/audit.c
> > @@ -400,8 +400,9 @@ static int audit_log_config_change(char *function_name, 
> > u32 new, u32 old,
> >  {
> > struct audit_buffer *ab;
> > int rc = 0;
> > +   struct audit_context *context = audit_alloc_local();
> 
> We should be able to use current->audit_context here right?  If we
> can't for every caller, perhaps we pass an audit_context as an
> argument and only allocate a local context when the passed
> audit_context is NULL.
> 
> Also, if you're not comfortable always using current, just pass the
> audit_context as you do with audit_log_common_recv_msg().

As mentioned in the tree/watch/mark patch, this is all obsoleted by
making the AUDIT_CONFIG_CHANGE record a SYSCALL auxiliary record.
This review would have been more helpful a month and a half ago.

> > -   ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
> > +   ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
> > if (unlikely(!ab))
> > return rc;
> > audit_log_format(ab, "%s=%u old=%u", function_name, new, old);
> > @@ -411,6 +412,8 @@ static int audit_log_config_change(char *function_name, 
> > u32 new, u32 old,
> > allow_changes = 0; /* Something weird, deny request */
> > audit_log_format(ab, " res=%d", allow_changes);
> > audit_log_end(ab);
> > +   audit_log_container_info(context, "config", 
> > audit_get_containerid(current));
> > +   audit_free_context(context);
> > return rc;
> >  }
> >
> > @@ -1058,7 +1061,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 
> > msg_type)
> > return err;
> >  }
> >
> > -static void audit_log_common_recv_msg(struct audit_buffer **ab, u16 
> > msg_type)
> > +static void audit_log_common_recv_msg(struct audit_context *context,
> > + struct audit_buffer **ab, u16 
> > msg_type)
> >  {
> > uid_t uid = from_kuid(&init_user_ns, current_uid());
> > pid_t pid = task_tgid_nr(current);
> > @@ -1068,7 +1072,7 @@ static void audit_log_common_recv_msg(struct 
> > audit_buffer **ab, u16 msg_type)
> > return;
> > }
> >
> > -   *ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
> > +   *ab = audit_log_start(context, GFP_KERNEL, msg_type);
> > if (unlikely(!*ab))
> > return;
> > audit_log_format(*ab, "pid=%d uid=%u", pid, uid);
> > @@ -1097,11 +1101,12 @@ static void audit_log_feature_change(int which, u32 
> > old_feature, u32 new_feature
> >  u32 old_lock, u32 new_lock, int res)
> >  {
> > struct audit_buffer *ab;
> > +   struct audit_context *context = audit_alloc_local();
> 
> So I know based on the other patch we are currently discussing that we
> can use current here ...
> 
> > if (audit_enabled == AUDIT_OFF)
> > return;
> >
> > -   ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
> > +   ab = audit_log_start(context, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
> > if (!ab)
> > return;
> > audit_log_task_info(ab, current);
> > @@ -1109,6 +1114,8 @@ static void audit_log_feature_change(int which, u32 
> > old_feature, u32 new_feature
> >  audit_feature_names[which], !!old_feature, 
> > !!new_feature,
> >  !!old_lock, !!new_lock, res);
> > audit_log_end(ab);
> > +   audit_log_container_info(context, "feature", 
> > audit_get_containerid(current));
> > +   audit_free_context(context);
> >  }
> >
> >  static int audit_set_feature(struct sk_buff *skb)
> > @@ -1337,13 +1344,15 @@ static int audit_receive_msg(struct sk_buff *skb, 
> > struct nlmsghdr *nlh)
> >
> > err = audit_filter(msg_type, AUDIT_FILTER_USER);
> > if (err == 1) { /* match or error */
> > +   struct audit_context *context = audit_alloc_local();
> 
> I'm pretty sure we can use current here.
> 
> > err = 0;
> > if (msg_type == AUDIT_USER_TTY) {
> > err = tty_audit_push();
> > if (err)
> > break;
> > }
> > -   audit_log_common_recv_msg(&ab, msg_type);
> > +   audit_

[PATCH 12/39] net: move seq_file_single_net to

2018-04-19 Thread Christoph Hellwig

This helper deals with single_{open,release}_net internals and thus
belongs here.

Signed-off-by: Christoph Hellwig 
---
 include/linux/seq_file_net.h | 13 +
 include/net/ip_vs.h  | 12 
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/include/linux/seq_file_net.h b/include/linux/seq_file_net.h
index 43ccd84127b6..ed20faa99e05 100644
--- a/include/linux/seq_file_net.h
+++ b/include/linux/seq_file_net.h
@@ -28,4 +28,17 @@ static inline struct net *seq_file_net(struct seq_file *seq)
 #endif
 }
 
+/*
+ * This one is needed for single_open_net since net is stored directly in
+ * private not as a struct i.e. seq_file_net can't be used.
+ */
+static inline struct net *seq_file_single_net(struct seq_file *seq)
+{
+#ifdef CONFIG_NET_NS
+   return (struct net *)seq->private;
+#else
+   return &init_net;
+#endif
+}
+
 #endif
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index eb0bec043c96..aea7a124e66b 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -41,18 +41,6 @@ static inline struct netns_ipvs *net_ipvs(struct net* net)
return net->ipvs;
 }
 
-/* This one needed for single_open_net since net is stored directly in
- * private not as a struct i.e. seq_file_net can't be used.
- */
-static inline struct net *seq_file_single_net(struct seq_file *seq)
-{
-#ifdef CONFIG_NET_NS
-   return (struct net *)seq->private;
-#else
-   return &init_net;
-#endif
-}
-
 /* Connections' size value needed by ip_vs_ctl.c */
 extern int ip_vs_conn_tab_size;
 
-- 
2.17.0

[PATCH 38/39] ide: replace ->proc_fops with ->proc_show

2018-04-19 Thread Christoph Hellwig

Just set up the show callback in the tty_operations, and use
proc_create_single_data to create the file without additional
boilerplace code.

Signed-off-by: Christoph Hellwig 
---
 drivers/ide/ide-cd.c  |  15 +---
 drivers/ide/ide-disk_proc.c   |  62 ++--
 drivers/ide/ide-floppy_proc.c |  17 +
 drivers/ide/ide-proc.c| 136 +-
 drivers/ide/ide-tape.c|  17 +
 include/linux/ide.h   |   6 +-
 6 files changed, 31 insertions(+), 222 deletions(-)

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 5a8e8e3c22cd..b52a7bdace52 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1426,21 +1426,8 @@ static int idecd_capacity_proc_show(struct seq_file *m, 
void *v)
return 0;
 }
 
-static int idecd_capacity_proc_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, idecd_capacity_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idecd_capacity_proc_fops = {
-   .owner  = THIS_MODULE,
-   .open   = idecd_capacity_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
-
 static ide_proc_entry_t idecd_proc[] = {
-   { "capacity", S_IFREG|S_IRUGO, &idecd_capacity_proc_fops },
+   { "capacity", S_IFREG|S_IRUGO, idecd_capacity_proc_show },
{}
 };
 
diff --git a/drivers/ide/ide-disk_proc.c b/drivers/ide/ide-disk_proc.c
index 82a36ced4e96..95d239b2f646 100644
--- a/drivers/ide/ide-disk_proc.c
+++ b/drivers/ide/ide-disk_proc.c
@@ -52,19 +52,6 @@ static int idedisk_cache_proc_show(struct seq_file *m, void 
*v)
return 0;
 }
 
-static int idedisk_cache_proc_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, idedisk_cache_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idedisk_cache_proc_fops = {
-   .owner  = THIS_MODULE,
-   .open   = idedisk_cache_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
-
 static int idedisk_capacity_proc_show(struct seq_file *m, void *v)
 {
ide_drive_t*drive = (ide_drive_t *)m->private;
@@ -73,19 +60,6 @@ static int idedisk_capacity_proc_show(struct seq_file *m, 
void *v)
return 0;
 }
 
-static int idedisk_capacity_proc_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, idedisk_capacity_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idedisk_capacity_proc_fops = {
-   .owner  = THIS_MODULE,
-   .open   = idedisk_capacity_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
-
 static int __idedisk_proc_show(struct seq_file *m, ide_drive_t *drive, u8 
sub_cmd)
 {
u8 *buf;
@@ -114,43 +88,17 @@ static int idedisk_sv_proc_show(struct seq_file *m, void 
*v)
return __idedisk_proc_show(m, m->private, ATA_SMART_READ_VALUES);
 }
 
-static int idedisk_sv_proc_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, idedisk_sv_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idedisk_sv_proc_fops = {
-   .owner  = THIS_MODULE,
-   .open   = idedisk_sv_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
-
 static int idedisk_st_proc_show(struct seq_file *m, void *v)
 {
return __idedisk_proc_show(m, m->private, ATA_SMART_READ_THRESHOLDS);
 }
 
-static int idedisk_st_proc_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, idedisk_st_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idedisk_st_proc_fops = {
-   .owner  = THIS_MODULE,
-   .open   = idedisk_st_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
-
 ide_proc_entry_t ide_disk_proc[] = {
-   { "cache",S_IFREG|S_IRUGO, &idedisk_cache_proc_fops },
-   { "capacity", S_IFREG|S_IRUGO, &idedisk_capacity_proc_fops  },
-   { "geometry", S_IFREG|S_IRUGO, &ide_geometry_proc_fops  },
-   { "smart_values", S_IFREG|S_IRUSR, &idedisk_sv_proc_fops},
-   { "smart_thresholds", S_IFREG|S_IRUSR, &idedisk_st_proc_fops},
+   { "cache",S_IFREG|S_IRUGO, idedisk_cache_proc_show  },
+   { "capacity", S_IFREG|S_IRUGO, idedisk_capacity_proc_show   },
+   { "geometry", S_IFREG|S_IRUGO, ide_geometry_proc_show   },
+   { "smart_values", S_IFREG|S_IRUSR, idedisk_sv_proc_show },
+   { "smart_thresholds", S_IFREG|S_IRUSR, idedisk_st_proc_show },
{}
 };
 
diff --git a/drivers/ide/ide-floppy_proc.c b/drivers/ide/ide-floppy_proc.c
index 471457ebea67..7f697

[PATCH 39/39] tty: replace ->proc_fops with ->proc_show

2018-04-19 Thread Christoph Hellwig

Just set up the show callback in the tty_operations, and use
proc_create_single_data to create the file without additional
boilerplace code.

Signed-off-by: Christoph Hellwig 
---
 arch/ia64/hp/sim/simserial.c| 15 +--
 arch/xtensa/platforms/iss/console.c | 15 +--
 drivers/char/pcmcia/synclink_cs.c   | 15 +--
 drivers/mmc/core/sdio_uart.c| 15 +--
 drivers/staging/fwserial/fwserial.c | 15 +--
 drivers/tty/amiserial.c | 15 +--
 drivers/tty/cyclades.c  | 15 +--
 drivers/tty/serial/serial_core.c| 15 +--
 drivers/tty/synclink.c  | 15 +--
 drivers/tty/synclink_gt.c   | 15 +--
 drivers/tty/synclinkmp.c| 15 +--
 drivers/usb/serial/usb-serial.c | 15 +--
 fs/proc/proc_tty.c  |  6 +++---
 include/linux/tty_driver.h  |  2 +-
 14 files changed, 16 insertions(+), 172 deletions(-)

diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index a419ccf33cde..663388a73d4e 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -435,19 +435,6 @@ static int rs_proc_show(struct seq_file *m, void *v)
return 0;
 }
 
-static int rs_proc_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, rs_proc_show, NULL);
-}
-
-static const struct file_operations rs_proc_fops = {
-   .owner  = THIS_MODULE,
-   .open   = rs_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
-
 static const struct tty_operations hp_ops = {
.open = rs_open,
.close = rs_close,
@@ -462,7 +449,7 @@ static const struct tty_operations hp_ops = {
.unthrottle = rs_unthrottle,
.send_xchar = rs_send_xchar,
.hangup = rs_hangup,
-   .proc_fops = &rs_proc_fops,
+   .proc_show = rs_proc_show,
 };
 
 static const struct tty_port_operations hp_port_ops = {
diff --git a/arch/xtensa/platforms/iss/console.c 
b/arch/xtensa/platforms/iss/console.c
index 92f567f9a21e..af81a62faba6 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -153,19 +153,6 @@ static int rs_proc_show(struct seq_file *m, void *v)
return 0;
 }
 
-static int rs_proc_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, rs_proc_show, NULL);
-}
-
-static const struct file_operations rs_proc_fops = {
-   .owner  = THIS_MODULE,
-   .open   = rs_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
-
 static const struct tty_operations serial_ops = {
.open = rs_open,
.close = rs_close,
@@ -176,7 +163,7 @@ static const struct tty_operations serial_ops = {
.chars_in_buffer = rs_chars_in_buffer,
.hangup = rs_hangup,
.wait_until_sent = rs_wait_until_sent,
-   .proc_fops = &rs_proc_fops,
+   .proc_show = rs_proc_show,
 };
 
 int __init rs_init(void)
diff --git a/drivers/char/pcmcia/synclink_cs.c 
b/drivers/char/pcmcia/synclink_cs.c
index aa502e9fb7fa..66b04194aa9f 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -2616,19 +2616,6 @@ static int mgslpc_proc_show(struct seq_file *m, void *v)
return 0;
 }
 
-static int mgslpc_proc_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, mgslpc_proc_show, NULL);
-}
-
-static const struct file_operations mgslpc_proc_fops = {
-   .owner  = THIS_MODULE,
-   .open   = mgslpc_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
-
 static int rx_alloc_buffers(MGSLPC_INFO *info)
 {
/* each buffer has header and data */
@@ -2815,7 +2802,7 @@ static const struct tty_operations mgslpc_ops = {
.tiocmget = tiocmget,
.tiocmset = tiocmset,
.get_icount = mgslpc_get_icount,
-   .proc_fops = &mgslpc_proc_fops,
+   .proc_show = mgslpc_proc_show,
 };
 
 static int __init synclink_cs_init(void)
diff --git a/drivers/mmc/core/sdio_uart.c b/drivers/mmc/core/sdio_uart.c
index d3c91f412b69..25e113001a3c 100644
--- a/drivers/mmc/core/sdio_uart.c
+++ b/drivers/mmc/core/sdio_uart.c
@@ -1008,19 +1008,6 @@ static int sdio_uart_proc_show(struct seq_file *m, void 
*v)
return 0;
 }
 
-static int sdio_uart_proc_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, sdio_uart_proc_show, NULL);
-}
-
-static const struct file_operations sdio_uart_proc_fops = {
-   .owner  = THIS_MODULE,
-   .open   = sdio_uart_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
-
 static const struct t

[PATCH 31/39] netfilter/x_tables: switch to proc_create_seq_private

2018-04-19 Thread Christoph Hellwig

And remove proc boilerplate code.

Signed-off-by: Christoph Hellwig 
---
 net/netfilter/x_tables.c | 42 ++--
 1 file changed, 6 insertions(+), 36 deletions(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 344dd01a5027..0e314f95a4a3 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1648,22 +1648,6 @@ static const struct seq_operations xt_match_seq_ops = {
.show   = xt_match_seq_show,
 };
 
-static int xt_match_open(struct inode *inode, struct file *file)
-{
-   struct nf_mttg_trav *trav;
-   trav = __seq_open_private(file, &xt_match_seq_ops, sizeof(*trav));
-   if (!trav)
-   return -ENOMEM;
-   return 0;
-}
-
-static const struct file_operations xt_match_ops = {
-   .open= xt_match_open,
-   .read= seq_read,
-   .llseek  = seq_lseek,
-   .release = seq_release_private,
-};
-
 static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos)
 {
return xt_mttg_seq_start(seq, pos, true);
@@ -1698,22 +1682,6 @@ static const struct seq_operations xt_target_seq_ops = {
.show   = xt_target_seq_show,
 };
 
-static int xt_target_open(struct inode *inode, struct file *file)
-{
-   struct nf_mttg_trav *trav;
-   trav = __seq_open_private(file, &xt_target_seq_ops, sizeof(*trav));
-   if (!trav)
-   return -ENOMEM;
-   return 0;
-}
-
-static const struct file_operations xt_target_ops = {
-   .open= xt_target_open,
-   .read= seq_read,
-   .llseek  = seq_lseek,
-   .release = seq_release_private,
-};
-
 #define FORMAT_TABLES  "_tables_names"
 #defineFORMAT_MATCHES  "_tables_matches"
 #define FORMAT_TARGETS "_tables_targets"
@@ -1787,8 +1755,9 @@ int xt_proto_init(struct net *net, u_int8_t af)
 
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
-   proc = proc_create_data(buf, 0440, net->proc_net, &xt_match_ops,
-   (void *)(unsigned long)af);
+   proc = proc_create_seq_private(buf, 0440, net->proc_net,
+   &xt_match_seq_ops, sizeof(struct nf_mttg_trav),
+   (void *)(unsigned long)af);
if (!proc)
goto out_remove_tables;
if (uid_valid(root_uid) && gid_valid(root_gid))
@@ -1796,8 +1765,9 @@ int xt_proto_init(struct net *net, u_int8_t af)
 
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
-   proc = proc_create_data(buf, 0440, net->proc_net, &xt_target_ops,
-   (void *)(unsigned long)af);
+   proc = proc_create_seq_private(buf, 0440, net->proc_net,
+&xt_target_seq_ops, sizeof(struct nf_mttg_trav),
+(void *)(unsigned long)af);
if (!proc)
goto out_remove_matches;
if (uid_valid(root_uid) && gid_valid(root_gid))
-- 
2.17.0

[PATCH 29/39] neigh: switch to proc_create_seq_data

2018-04-19 Thread Christoph Hellwig

And use proc private data directly instead of doing a detour
through seq->private.

Signed-off-by: Christoph Hellwig 
---
 net/core/neighbour.c | 31 ++-
 1 file changed, 6 insertions(+), 25 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 7b7a14abba28..cbfb6d71b8be 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -58,7 +58,7 @@ static void neigh_update_notify(struct neighbour *neigh, u32 
nlmsg_pid);
 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
 
 #ifdef CONFIG_PROC_FS
-static const struct file_operations neigh_stat_seq_fops;
+static const struct seq_operations neigh_stat_seq_ops;
 #endif
 
 /*
@@ -1550,8 +1550,8 @@ void neigh_table_init(int index, struct neigh_table *tbl)
panic("cannot create neighbour cache statistics");
 
 #ifdef CONFIG_PROC_FS
-   if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
- &neigh_stat_seq_fops, tbl))
+   if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
+ &neigh_stat_seq_ops, tbl))
panic("cannot create neighbour proc dir entry");
 #endif
 
@@ -2774,7 +2774,7 @@ EXPORT_SYMBOL(neigh_seq_stop);
 
 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
 {
-   struct neigh_table *tbl = seq->private;
+   struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
int cpu;
 
if (*pos == 0)
@@ -2791,7 +2791,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, 
loff_t *pos)
 
 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-   struct neigh_table *tbl = seq->private;
+   struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
int cpu;
 
for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
@@ -2810,7 +2810,7 @@ static void neigh_stat_seq_stop(struct seq_file *seq, 
void *v)
 
 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
 {
-   struct neigh_table *tbl = seq->private;
+   struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
struct neigh_statistics *st = v;
 
if (v == SEQ_START_TOKEN) {
@@ -2849,25 +2849,6 @@ static const struct seq_operations neigh_stat_seq_ops = {
.stop   = neigh_stat_seq_stop,
.show   = neigh_stat_seq_show,
 };
-
-static int neigh_stat_seq_open(struct inode *inode, struct file *file)
-{
-   int ret = seq_open(file, &neigh_stat_seq_ops);
-
-   if (!ret) {
-   struct seq_file *sf = file->private_data;
-   sf->private = PDE_DATA(inode);
-   }
-   return ret;
-};
-
-static const struct file_operations neigh_stat_seq_fops = {
-   .open= neigh_stat_seq_open,
-   .read= seq_read,
-   .llseek  = seq_lseek,
-   .release = seq_release,
-};
-
 #endif /* CONFIG_PROC_FS */
 
 static inline size_t neigh_nlmsg_size(void)
-- 
2.17.0

[PATCH 30/39] netfilter/xt_hashlimit: switch to proc_create_{seq,single}_data

2018-04-19 Thread Christoph Hellwig

And use proc private data directly instead of doing a detour
through seq->private.

Signed-off-by: Christoph Hellwig 
---
 net/netfilter/xt_hashlimit.c | 92 +++-
 1 file changed, 18 insertions(+), 74 deletions(-)

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 0cd73567e7ff..9b16402f29af 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -57,9 +57,9 @@ static inline struct hashlimit_net *hashlimit_pernet(struct 
net *net)
 }
 
 /* need to declare this at the top */
-static const struct file_operations dl_file_ops_v2;
-static const struct file_operations dl_file_ops_v1;
-static const struct file_operations dl_file_ops;
+static const struct seq_operations dl_seq_ops_v2;
+static const struct seq_operations dl_seq_ops_v1;
+static const struct seq_operations dl_seq_ops;
 
 /* hash table crap */
 struct dsthash_dst {
@@ -272,7 +272,7 @@ static int htable_create(struct net *net, struct 
hashlimit_cfg3 *cfg,
 {
struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
struct xt_hashlimit_htable *hinfo;
-   const struct file_operations *fops;
+   const struct seq_operations *ops;
unsigned int size, i;
int ret;
 
@@ -321,19 +321,19 @@ static int htable_create(struct net *net, struct 
hashlimit_cfg3 *cfg,
 
switch (revision) {
case 1:
-   fops = &dl_file_ops_v1;
+   ops = &dl_seq_ops_v1;
break;
case 2:
-   fops = &dl_file_ops_v2;
+   ops = &dl_seq_ops_v2;
break;
default:
-   fops = &dl_file_ops;
+   ops = &dl_seq_ops;
}
 
-   hinfo->pde = proc_create_data(name, 0,
+   hinfo->pde = proc_create_seq_data(name, 0,
(family == NFPROTO_IPV4) ?
hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
-   fops, hinfo);
+   ops, hinfo);
if (hinfo->pde == NULL) {
kfree(hinfo->name);
vfree(hinfo);
@@ -1057,7 +1057,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = 
{
 static void *dl_seq_start(struct seq_file *s, loff_t *pos)
__acquires(htable->lock)
 {
-   struct xt_hashlimit_htable *htable = s->private;
+   struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
unsigned int *bucket;
 
spin_lock_bh(&htable->lock);
@@ -1074,7 +1074,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos)
 
 static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
-   struct xt_hashlimit_htable *htable = s->private;
+   struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
unsigned int *bucket = v;
 
*pos = ++(*bucket);
@@ -1088,7 +1088,7 @@ static void *dl_seq_next(struct seq_file *s, void *v, 
loff_t *pos)
 static void dl_seq_stop(struct seq_file *s, void *v)
__releases(htable->lock)
 {
-   struct xt_hashlimit_htable *htable = s->private;
+   struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
unsigned int *bucket = v;
 
if (!IS_ERR(bucket))
@@ -1130,7 +1130,7 @@ static void dl_seq_print(struct dsthash_ent *ent, 
u_int8_t family,
 static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
   struct seq_file *s)
 {
-   const struct xt_hashlimit_htable *ht = s->private;
+   struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private));
 
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1145,7 +1145,7 @@ static int dl_seq_real_show_v2(struct dsthash_ent *ent, 
u_int8_t family,
 static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
   struct seq_file *s)
 {
-   const struct xt_hashlimit_htable *ht = s->private;
+   struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private));
 
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1160,7 +1160,7 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, 
u_int8_t family,
 static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
struct seq_file *s)
 {
-   const struct xt_hashlimit_htable *ht = s->private;
+   struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private));
 
spin_lock(&ent->lock);
/* recalculate to show accurate numbers */
@@ -1174,7 +1174,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, 
u_int8_t family,
 
 static int dl_seq_show_v2(struct seq_file *s, void *v)
 {
-   struct xt_hashlimit_htable *htable = s->private;
+   struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
unsigned int *bucket = (unsigned int *)v;
struct dsthash_ent *ent;
 
@@ -1188,7 +1188,7 @@ static int dl_seq_show_v2(struct seq_file *s, void *v)
 
 static int dl_seq_s

[PATCH 35/39] isdn: replace ->proc_fops with ->proc_show

2018-04-19 Thread Christoph Hellwig

And switch to proc_create_single_data.

Signed-off-by: Christoph Hellwig 
---
 drivers/isdn/capi/kcapi.c  |  3 ++-
 drivers/isdn/gigaset/capi.c| 16 +---
 drivers/isdn/hardware/avm/avmcard.h|  4 ++--
 drivers/isdn/hardware/avm/b1.c | 17 ++---
 drivers/isdn/hardware/avm/b1dma.c  | 17 ++---
 drivers/isdn/hardware/avm/b1isa.c  |  2 +-
 drivers/isdn/hardware/avm/b1pci.c  |  4 ++--
 drivers/isdn/hardware/avm/b1pcmcia.c   |  2 +-
 drivers/isdn/hardware/avm/c4.c | 15 +--
 drivers/isdn/hardware/avm/t1isa.c  |  2 +-
 drivers/isdn/hardware/avm/t1pci.c  |  2 +-
 drivers/isdn/hardware/eicon/capimain.c | 15 +--
 drivers/isdn/hysdn/hycapi.c| 15 +--
 include/linux/isdn/capilli.h   |  2 +-
 net/bluetooth/cmtp/capi.c  | 14 +-
 15 files changed, 20 insertions(+), 110 deletions(-)

diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 46c189ad8d94..0ff517d3c98f 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -534,7 +534,8 @@ int attach_capi_ctr(struct capi_ctr *ctr)
init_waitqueue_head(&ctr->state_wait_queue);
 
sprintf(ctr->procfn, "capi/controllers/%d", ctr->cnr);
-   ctr->procent = proc_create_data(ctr->procfn, 0, NULL, ctr->proc_fops, 
ctr);
+   ctr->procent = proc_create_single_data(ctr->procfn, 0, NULL,
+   ctr->proc_show, ctr);
 
ncontrollers++;
 
diff --git a/drivers/isdn/gigaset/capi.c b/drivers/isdn/gigaset/capi.c
index ccec7778cad2..dac5cd35e901 100644
--- a/drivers/isdn/gigaset/capi.c
+++ b/drivers/isdn/gigaset/capi.c
@@ -2437,19 +2437,6 @@ static int gigaset_proc_show(struct seq_file *m, void *v)
return 0;
 }
 
-static int gigaset_proc_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, gigaset_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations gigaset_proc_fops = {
-   .owner  = THIS_MODULE,
-   .open   = gigaset_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
-
 /**
  * gigaset_isdn_regdev() - register device to LL
  * @cs:device descriptor structure.
@@ -2478,8 +2465,7 @@ int gigaset_isdn_regdev(struct cardstate *cs, const char 
*isdnid)
iif->ctr.register_appl = gigaset_register_appl;
iif->ctr.release_appl  = gigaset_release_appl;
iif->ctr.send_message  = gigaset_send_message;
-   iif->ctr.procinfo  = gigaset_procinfo;
-   iif->ctr.proc_fops = &gigaset_proc_fops;
+   iif->ctr.proc_show = gigaset_proc_show,
INIT_LIST_HEAD(&iif->appls);
skb_queue_head_init(&iif->sendqueue);
atomic_set(&iif->sendqlen, 0);
diff --git a/drivers/isdn/hardware/avm/avmcard.h 
b/drivers/isdn/hardware/avm/avmcard.h
index c95712dbfa9f..cdfa89c71997 100644
--- a/drivers/isdn/hardware/avm/avmcard.h
+++ b/drivers/isdn/hardware/avm/avmcard.h
@@ -556,7 +556,7 @@ u16  b1_send_message(struct capi_ctr *ctrl, struct sk_buff 
*skb);
 void b1_parse_version(avmctrl_info *card);
 irqreturn_t b1_interrupt(int interrupt, void *devptr);
 
-extern const struct file_operations b1ctl_proc_fops;
+int b1_proc_show(struct seq_file *m, void *v);
 
 avmcard_dmainfo *avmcard_dma_alloc(char *name, struct pci_dev *,
   long rsize, long ssize);
@@ -576,6 +576,6 @@ void b1dma_register_appl(struct capi_ctr *ctrl,
 capi_register_params *rp);
 void b1dma_release_appl(struct capi_ctr *ctrl, u16 appl);
 u16  b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb);
-extern const struct file_operations b1dmactl_proc_fops;
+int b1dma_proc_show(struct seq_file *m, void *v);
 
 #endif /* _AVMCARD_H_ */
diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c
index b1833d08a5fe..5ee5489d3f15 100644
--- a/drivers/isdn/hardware/avm/b1.c
+++ b/drivers/isdn/hardware/avm/b1.c
@@ -637,7 +637,7 @@ irqreturn_t b1_interrupt(int interrupt, void *devptr)
 }
 
 /* - */
-static int b1ctl_proc_show(struct seq_file *m, void *v)
+int b1_proc_show(struct seq_file *m, void *v)
 {
struct capi_ctr *ctrl = m->private;
avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
@@ -699,20 +699,7 @@ static int b1ctl_proc_show(struct seq_file *m, void *v)
 
return 0;
 }
-
-static int b1ctl_proc_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, b1ctl_proc_show, PDE_DATA(inode));
-}
-
-const struct file_operations b1ctl_proc_fops = {
-   .owner  = THIS_MODULE,
-   .open   = b1ctl_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
-EXPORT_SYMBOL(b1ctl_proc_fops);
+EXPORT_SYMBOL(b1_proc_show);

Re: [PATCH bpf-next v3 7/8] bpf: add documentation for eBPF helpers (51-57)

2018-04-19 Thread Daniel Borkmann

On 04/17/2018 04:34 PM, Quentin Monnet wrote:
> Add documentation for eBPF helper functions to bpf.h user header file.
> This documentation can be parsed with the Python script provided in
> another commit of the patch series, in order to provide a RST document
> that can later be converted into a man page.
> 
> The objective is to make the documentation easily understandable and
> accessible to all eBPF developers, including beginners.
> 
> This patch contains descriptions for the following helper functions:
> 
> Helpers from Lawrence:
> - bpf_setsockopt()
> - bpf_getsockopt()
> - bpf_sock_ops_cb_flags_set()
> 
> Helpers from Yonghong:
> - bpf_perf_event_read_value()
> - bpf_perf_prog_read_value()
> 
> Helper from Josef:
> - bpf_override_return()
> 
> Helper from Andrey:
> - bpf_bind()
> 
> v3:
> - bpf_perf_event_read_value(): Fix time of selection for perf event type
>   in description. Remove occurences of "cores" to avoid confusion with
>   "CPU".
> - bpf_bind(): Remove last paragraph of description, which was off topic.
> 
> Cc: Lawrence Brakmo 
> Cc: Yonghong Song 
> Cc: Josef Bacik 
> Cc: Andrey Ignatov 
> Signed-off-by: Quentin Monnet 
> 
> fix patch 7: Yonghong and Andrey
> ---
>  include/uapi/linux/bpf.h | 178 
> +++
>  1 file changed, 178 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index dd79a1c82adf..350459c583de 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1234,6 +1234,28 @@ union bpf_attr {
>   *   Return
>   *   0
>   *
> + * int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int 
> optname, char *optval, int optlen)
> + *   Description
> + *   Emulate a call to **setsockopt()** on the socket associated to
> + *   *bpf_socket*, which must be a full socket. The *level* at
> + *   which the option resides and the name *optname* of the option
> + *   must be specified, see **setsockopt(2)** for more information.
> + *   The option value of length *optlen* is pointed by *optval*.
> + *
> + *   This helper actually implements a subset of **setsockopt()**.
> + *   It supports the following *level*\ s:
> + *
> + *   * **SOL_SOCKET**, which supports the following *optname*\ s:
> + * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
> + * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
> + *   * **IPPROTO_TCP**, which supports the following *optname*\ s:
> + * **TCP_CONGESTION**, **TCP_BPF_IW**,
> + * **TCP_BPF_SNDCWND_CLAMP**.
> + *   * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
> + *   * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
> + *   Return
> + *   0 on success, or a negative error in case of failure.
> + *
>   * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 
> flags)
>   *   Description
>   *   Grow or shrink the room for data in the packet associated to
> @@ -1281,6 +1303,162 @@ union bpf_attr {
>   *   performed again.
>   *   Return
>   *   0 on success, or a negative error in case of failure.
> + *
> + * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct 
> bpf_perf_event_value *buf, u32 buf_size)
> + *   Description
> + *   Read the value of a perf event counter, and store it into *buf*
> + *   of size *buf_size*. This helper relies on a *map* of type
> + *   **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
> + *   counter is selected when *map* is updated with perf event file
> + *   descriptors. The *map* is an array whose size is the number of
> + *   available CPUs, and each cell contains a value relative to one
> + *   CPU. The value to retrieve is indicated by *flags*, that
> + *   contains the index of the CPU to look up, masked with
> + *   **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
> + *   **BPF_F_CURRENT_CPU** to indicate that the value for the
> + *   current CPU should be retrieved.
> + *
> + *   This helper behaves in a way close to
> + *   **bpf_perf_event_read**\ () helper, save that instead of
> + *   just returning the value observed, it fills the *buf*
> + *   structure. This allows for additional data to be retrieved: in
> + *   particular, the enabled and running times (in *buf*\
> + *   **->enabled** and *buf*\ **->running**, respectively) are
> + *   copied.

Since you mention bpf_perf_event_read() here, we should mention that
bpf_perf_event_read_value() is recommended over bpf_perf_event_read()
in general. The latter bpf_perf_event_read() has some ABI quirks where
error and counter value are used as a return code (which is obviously
wrong to do since ranges may overlap). bpf_perf_event_read_value()
fixed this but

[PATCH 34/39] atm: switch to proc_create_seq_private

2018-04-19 Thread Christoph Hellwig

And remove proc boilerplate code.

Signed-off-by: Christoph Hellwig 
---
 net/atm/proc.c | 72 +-
 1 file changed, 13 insertions(+), 59 deletions(-)

diff --git a/net/atm/proc.c b/net/atm/proc.c
index f272b0f59d82..0b0495a41bbe 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -68,7 +68,6 @@ static void atm_dev_info(struct seq_file *seq, const struct 
atm_dev *dev)
 struct vcc_state {
int bucket;
struct sock *sk;
-   int family;
 };
 
 static inline int compare_family(struct sock *sk, int family)
@@ -106,23 +105,13 @@ static int __vcc_walk(struct sock **sock, int family, int 
*bucket, loff_t l)
return (l < 0);
 }
 
-static inline void *vcc_walk(struct vcc_state *state, loff_t l)
+static inline void *vcc_walk(struct seq_file *seq, loff_t l)
 {
-   return __vcc_walk(&state->sk, state->family, &state->bucket, l) ?
-  state : NULL;
-}
-
-static int __vcc_seq_open(struct inode *inode, struct file *file,
-   int family, const struct seq_operations *ops)
-{
-   struct vcc_state *state;
-
-   state = __seq_open_private(file, ops, sizeof(*state));
-   if (state == NULL)
-   return -ENOMEM;
+   struct vcc_state *state = seq->private;
+   int family = (uintptr_t)(PDE_DATA(file_inode(seq->file)));
 
-   state->family = family;
-   return 0;
+   return __vcc_walk(&state->sk, family, &state->bucket, l) ?
+  state : NULL;
 }
 
 static void *vcc_seq_start(struct seq_file *seq, loff_t *pos)
@@ -133,7 +122,7 @@ static void *vcc_seq_start(struct seq_file *seq, loff_t 
*pos)
 
read_lock(&vcc_sklist_lock);
state->sk = SEQ_START_TOKEN;
-   return left ? vcc_walk(state, left) : SEQ_START_TOKEN;
+   return left ? vcc_walk(seq, left) : SEQ_START_TOKEN;
 }
 
 static void vcc_seq_stop(struct seq_file *seq, void *v)
@@ -144,9 +133,7 @@ static void vcc_seq_stop(struct seq_file *seq, void *v)
 
 static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-   struct vcc_state *state = seq->private;
-
-   v = vcc_walk(state, 1);
+   v = vcc_walk(seq, 1);
*pos += !!PTR_ERR(v);
return v;
 }
@@ -280,18 +267,6 @@ static const struct seq_operations pvc_seq_ops = {
.show   = pvc_seq_show,
 };
 
-static int pvc_seq_open(struct inode *inode, struct file *file)
-{
-   return __vcc_seq_open(inode, file, PF_ATMPVC, &pvc_seq_ops);
-}
-
-static const struct file_operations pvc_seq_fops = {
-   .open   = pvc_seq_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= seq_release_private,
-};
-
 static int vcc_seq_show(struct seq_file *seq, void *v)
 {
if (v == SEQ_START_TOKEN) {
@@ -314,18 +289,6 @@ static const struct seq_operations vcc_seq_ops = {
.show   = vcc_seq_show,
 };
 
-static int vcc_seq_open(struct inode *inode, struct file *file)
-{
-   return __vcc_seq_open(inode, file, 0, &vcc_seq_ops);
-}
-
-static const struct file_operations vcc_seq_fops = {
-   .open   = vcc_seq_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= seq_release_private,
-};
-
 static int svc_seq_show(struct seq_file *seq, void *v)
 {
static const char atm_svc_banner[] =
@@ -349,18 +312,6 @@ static const struct seq_operations svc_seq_ops = {
.show   = svc_seq_show,
 };
 
-static int svc_seq_open(struct inode *inode, struct file *file)
-{
-   return __vcc_seq_open(inode, file, PF_ATMSVC, &svc_seq_ops);
-}
-
-static const struct file_operations svc_seq_fops = {
-   .open   = svc_seq_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= seq_release_private,
-};
-
 static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
 size_t count, loff_t *pos)
 {
@@ -434,9 +385,12 @@ int __init atm_proc_init(void)
if (!atm_proc_root)
return -ENOMEM;
proc_create_seq("devices", 0444, atm_proc_root, &atm_dev_seq_ops);
-   proc_create("pvc", 0444, atm_proc_root, &pvc_seq_fops);
-   proc_create("svc", 0444, atm_proc_root, &svc_seq_fops);
-   proc_create("vc", 0444, atm_proc_root, &vcc_seq_fops);
+   proc_create_seq_private("pvc", 0444, atm_proc_root, &pvc_seq_ops,
+   sizeof(struct vcc_state), (void *)(uintptr_t)PF_ATMPVC);
+   proc_create_seq_private("svc", 0444, atm_proc_root, &svc_seq_ops,
+   sizeof(struct vcc_state), (void *)(uintptr_t)PF_ATMSVC);
+   proc_create_seq_private("vc", 0444, atm_proc_root, &vcc_seq_ops,
+   sizeof(struct vcc_state), NULL);
return 0;
 }
 
-- 
2.17.0

[PATCH 36/39] proc: don't detour through seq->private to get the inode

2018-04-19 Thread Christoph Hellwig

Signed-off-by: Christoph Hellwig 
---
 fs/proc/array.c | 18 --
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index ae2c807fd719..b34796b562ef 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -677,7 +677,7 @@ get_children_pid(struct inode *inode, struct pid *pid_prev, 
loff_t pos)
 
 static int children_seq_show(struct seq_file *seq, void *v)
 {
-   struct inode *inode = seq->private;
+   struct inode *inode = file_inode(seq->file);
pid_t pid;
 
pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
@@ -688,14 +688,14 @@ static int children_seq_show(struct seq_file *seq, void 
*v)
 
 static void *children_seq_start(struct seq_file *seq, loff_t *pos)
 {
-   return get_children_pid(seq->private, NULL, *pos);
+   return get_children_pid(file_inode(seq->file), NULL, *pos);
 }
 
 static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
struct pid *pid;
 
-   pid = get_children_pid(seq->private, v, *pos + 1);
+   pid = get_children_pid(file_inode(seq->file), v, *pos + 1);
put_pid(v);
 
++*pos;
@@ -716,17 +716,7 @@ static const struct seq_operations children_seq_ops = {
 
 static int children_seq_open(struct inode *inode, struct file *file)
 {
-   struct seq_file *m;
-   int ret;
-
-   ret = seq_open(file, &children_seq_ops);
-   if (ret)
-   return ret;
-
-   m = file->private_data;
-   m->private = inode;
-
-   return ret;
+   return seq_open(file, &children_seq_ops);
 }
 
 const struct file_operations proc_tid_children_operations = {
-- 
2.17.0

[PATCH 28/39] hostap: switch to proc_create_{seq,single}_data

2018-04-19 Thread Christoph Hellwig

And use proc private data directly instead of doing a detour
through seq->private.

Signed-off-by: Christoph Hellwig 
---
 .../net/wireless/intersil/hostap/hostap_ap.c  |  70 ++---
 .../net/wireless/intersil/hostap/hostap_hw.c  |  17 +--
 .../wireless/intersil/hostap/hostap_proc.c| 143 +++---
 3 files changed, 39 insertions(+), 191 deletions(-)

diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c 
b/drivers/net/wireless/intersil/hostap/hostap_ap.c
index 4f76f81dd3af..d1884b8913e7 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ap.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c
@@ -69,7 +69,7 @@ static void prism2_send_mgmt(struct net_device *dev,
 #ifndef PRISM2_NO_PROCFS_DEBUG
 static int ap_debug_proc_show(struct seq_file *m, void *v)
 {
-   struct ap_data *ap = m->private;
+   struct ap_data *ap = PDE_DATA(file_inode(m->file));
 
seq_printf(m, "BridgedUnicastFrames=%u\n", ap->bridged_unicast);
seq_printf(m, "BridgedMulticastFrames=%u\n", ap->bridged_multicast);
@@ -81,18 +81,6 @@ static int ap_debug_proc_show(struct seq_file *m, void *v)
seq_printf(m, "tx_drop_nonassoc=%u\n", ap->tx_drop_nonassoc);
return 0;
 }
-
-static int ap_debug_proc_open(struct inode *inode, struct file *file)
-{
-   return single_open(file, ap_debug_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ap_debug_proc_fops = {
-   .open   = ap_debug_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
 #endif /* PRISM2_NO_PROCFS_DEBUG */
 
 
@@ -333,7 +321,7 @@ void hostap_deauth_all_stas(struct net_device *dev, struct 
ap_data *ap,
 
 static int ap_control_proc_show(struct seq_file *m, void *v)
 {
-   struct ap_data *ap = m->private;
+   struct ap_data *ap = PDE_DATA(file_inode(m->file));
char *policy_txt;
struct mac_entry *entry;
 
@@ -365,20 +353,20 @@ static int ap_control_proc_show(struct seq_file *m, void 
*v)
 
 static void *ap_control_proc_start(struct seq_file *m, loff_t *_pos)
 {
-   struct ap_data *ap = m->private;
+   struct ap_data *ap = PDE_DATA(file_inode(m->file));
spin_lock_bh(&ap->mac_restrictions.lock);
return seq_list_start_head(&ap->mac_restrictions.mac_list, *_pos);
 }
 
 static void *ap_control_proc_next(struct seq_file *m, void *v, loff_t *_pos)
 {
-   struct ap_data *ap = m->private;
+   struct ap_data *ap = PDE_DATA(file_inode(m->file));
return seq_list_next(v, &ap->mac_restrictions.mac_list, _pos);
 }
 
 static void ap_control_proc_stop(struct seq_file *m, void *v)
 {
-   struct ap_data *ap = m->private;
+   struct ap_data *ap = PDE_DATA(file_inode(m->file));
spin_unlock_bh(&ap->mac_restrictions.lock);
 }
 
@@ -389,24 +377,6 @@ static const struct seq_operations ap_control_proc_seqops 
= {
.show   = ap_control_proc_show,
 };
 
-static int ap_control_proc_open(struct inode *inode, struct file *file)
-{
-   int ret = seq_open(file, &ap_control_proc_seqops);
-   if (ret == 0) {
-   struct seq_file *m = file->private_data;
-   m->private = PDE_DATA(inode);
-   }
-   return ret;
-}
-
-static const struct file_operations ap_control_proc_fops = {
-   .open   = ap_control_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= seq_release,
-};
-
-
 int ap_control_add_mac(struct mac_restrictions *mac_restrictions, u8 *mac)
 {
struct mac_entry *entry;
@@ -585,20 +555,20 @@ static int prism2_ap_proc_show(struct seq_file *m, void 
*v)
 
 static void *prism2_ap_proc_start(struct seq_file *m, loff_t *_pos)
 {
-   struct ap_data *ap = m->private;
+   struct ap_data *ap = PDE_DATA(file_inode(m->file));
spin_lock_bh(&ap->sta_table_lock);
return seq_list_start_head(&ap->sta_list, *_pos);
 }
 
 static void *prism2_ap_proc_next(struct seq_file *m, void *v, loff_t *_pos)
 {
-   struct ap_data *ap = m->private;
+   struct ap_data *ap = PDE_DATA(file_inode(m->file));
return seq_list_next(v, &ap->sta_list, _pos);
 }
 
 static void prism2_ap_proc_stop(struct seq_file *m, void *v)
 {
-   struct ap_data *ap = m->private;
+   struct ap_data *ap = PDE_DATA(file_inode(m->file));
spin_unlock_bh(&ap->sta_table_lock);
 }
 
@@ -608,23 +578,6 @@ static const struct seq_operations prism2_ap_proc_seqops = 
{
.stop   = prism2_ap_proc_stop,
.show   = prism2_ap_proc_show,
 };
-
-static int prism2_ap_proc_open(struct inode *inode, struct file *file)
-{
-   int ret = seq_open(file, &prism2_ap_proc_seqops);
-   if (ret == 0) {
-   struct seq_file *m = file->private_data;
-   m->private = PDE_DATA(inode);
-   }
-   return ret;
-}
-
-static const struct file_operations prism2_ap_proc_fops = {
-   .open   = prism2_ap_proc_open,
-

[PATCH 32/39] bluetooth: switch to proc_create_seq_data

2018-04-19 Thread Christoph Hellwig

And use proc private data directly instead of doing a detour
through seq->private and private state.

Signed-off-by: Christoph Hellwig 
---
 net/bluetooth/af_bluetooth.c | 40 +---
 1 file changed, 5 insertions(+), 35 deletions(-)

diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 84d92a077834..3264e1873219 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -605,15 +605,10 @@ int bt_sock_wait_ready(struct sock *sk, unsigned long 
flags)
 EXPORT_SYMBOL(bt_sock_wait_ready);
 
 #ifdef CONFIG_PROC_FS
-struct bt_seq_state {
-   struct bt_sock_list *l;
-};
-
 static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(seq->private->l->lock)
 {
-   struct bt_seq_state *s = seq->private;
-   struct bt_sock_list *l = s->l;
+   struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
 
read_lock(&l->lock);
return seq_hlist_start_head(&l->head, *pos);
@@ -621,8 +616,7 @@ static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-   struct bt_seq_state *s = seq->private;
-   struct bt_sock_list *l = s->l;
+   struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
 
return seq_hlist_next(v, &l->head, pos);
 }
@@ -630,16 +624,14 @@ static void *bt_seq_next(struct seq_file *seq, void *v, 
loff_t *pos)
 static void bt_seq_stop(struct seq_file *seq, void *v)
__releases(seq->private->l->lock)
 {
-   struct bt_seq_state *s = seq->private;
-   struct bt_sock_list *l = s->l;
+   struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
 
read_unlock(&l->lock);
 }
 
 static int bt_seq_show(struct seq_file *seq, void *v)
 {
-   struct bt_seq_state *s = seq->private;
-   struct bt_sock_list *l = s->l;
+   struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
 
if (v == SEQ_START_TOKEN) {
seq_puts(seq ,"sk   RefCnt Rmem   Wmem   User   
Inode  Parent");
@@ -681,35 +673,13 @@ static const struct seq_operations bt_seq_ops = {
.show  = bt_seq_show,
 };
 
-static int bt_seq_open(struct inode *inode, struct file *file)
-{
-   struct bt_sock_list *sk_list;
-   struct bt_seq_state *s;
-
-   sk_list = PDE_DATA(inode);
-   s = __seq_open_private(file, &bt_seq_ops,
-  sizeof(struct bt_seq_state));
-   if (!s)
-   return -ENOMEM;
-
-   s->l = sk_list;
-   return 0;
-}
-
-static const struct file_operations bt_fops = {
-   .open = bt_seq_open,
-   .read = seq_read,
-   .llseek = seq_lseek,
-   .release = seq_release_private
-};
-
 int bt_procfs_init(struct net *net, const char *name,
   struct bt_sock_list *sk_list,
   int (* seq_show)(struct seq_file *, void *))
 {
sk_list->custom_seq_show = seq_show;
 
-   if (!proc_create_data(name, 0, net->proc_net, &bt_fops, sk_list))
+   if (!proc_create_seq_data(name, 0, net->proc_net, &bt_seq_ops, sk_list))
return -ENOMEM;
return 0;
 }
-- 
2.17.0

Re: [PATCH bpf-next v3 8/8] bpf: add documentation for eBPF helpers (58-64)

2018-04-19 Thread Quentin Monnet

2018-04-18 17:43 UTC+0200 ~ Jesper Dangaard Brouer 
> On Wed, 18 Apr 2018 15:09:41 +0100
> Quentin Monnet  wrote:
> 
>> 2018-04-18 15:34 UTC+0200 ~ Jesper Dangaard Brouer 
>>> On Tue, 17 Apr 2018 15:34:38 +0100
>>> Quentin Monnet  wrote:
>>>   
 diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
 index 350459c583de..3d329538498f 100644
 --- a/include/uapi/linux/bpf.h
 +++ b/include/uapi/linux/bpf.h
 @@ -1276,6 +1276,50 @@ union bpf_attr {
   *Return
   *0 on success, or a negative error in case of failure.
   *
 + * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
 + *Description
 + *Redirect the packet to the endpoint referenced by *map* 
 at
 + *index *key*. Depending on its type, his *map* can 
 contain  
>>> ^^^
>>>
>>> "his" -> "this"  
>>
>> Thanks!
>>
 + *references to net devices (for forwarding packets 
 through other
 + *ports), or to CPUs (for redirecting XDP frames to 
 another CPU;
 + *but this is only implemented for native XDP (with driver
 + *support) as of this writing).
 + *
 + *All values for *flags* are reserved for future usage, 
 and must
 + *be left at zero.
 + *Return
 + ***XDP_REDIRECT** on success, or **XDP_ABORT** on error.
 + *  
>>>
>>> "XDP_ABORT" -> "XDP_ABORTED"  
>>
>> Ouch. And I did the same for bpf_redirect(). Thanks for the catch.
>>
>>>
>>> I don't know if it's worth mentioning in the doc/man-page; that for XDP
>>> using bpf_redirect_map() is a HUGE performance advantage, compared to
>>> the bpf_redirect() call ?  
>>
>> It seems worth to me. How would you simply explain the reason for this
>> difference?
> 
> The basic reason is "bulking effect", as devmap avoids the NIC
> tailptr/doorbell update on every packet... how to write that in a doc
> format?
> 
> I wrote about why XDP_REDIRECT with maps are smart here:
>  
> http://vger.kernel.org/netconf2017_files/XDP_devel_update_NetConf2017_Seoul.pdf
> 
> Using maps for redirect, hopefully makes XDP_REDIRECT the last driver
> XDP action code we need.  As new types of redirect can be introduced
> without driver changes. See that AF_XDP also uses a map.
> 
> It is more subtle, but maps also function as a sorting step. Imagine
> your XDP program need to redirect out different interfaces (or CPUs in
> cpumap case), and packets arrive intermixed.  Packets get sorted into
> the different map indexes, and the xdp_do_flush_map() will trigger the
> flush operation.
> 
> 
> Happened to have an i40e NIC benchmark setup, and ran a single flow pktgen 
> test.
> 
> Results with 'xdp_redirect_map'
>  13589297 pps (13,589,297) 
> 
> Results with 'xdp_redirect' NOT using devmap:
>   7567575 pps (7,567,575)
> 
> Just to point out the performance benefit of devmap...


Thanks for those details! This is an impressive change in performance
indeed.

I think I will just keep it simple for the documentation. I will add the
following for bpf_redirect_map():

When used to redirect packets to net devices, this helper
provides a high performance increase over **bpf_redirect**\ ().
This is due to various implementation details of the underlying
mechanisms, one of which is the fact that **bpf_redirect_map**\ ()
tries to send packet as a "bulk" to the device.

And also append the following to bpf_redirect():

The same effect can be attained with the more generic
**bpf_redirect_map**\ (), which requires specific maps
to be used but offers better performance.

Best,
Quentin

[PATCH 19/39] sg: simplify procfs code

2018-04-19 Thread Christoph Hellwig

Use remove_proc_subtree to remove the whole subtree on cleanup, and
unwind the registration loop into individual calls.  Switch to use
proc_create_seq where applicable.

Signed-off-by: Christoph Hellwig 
---
 drivers/scsi/sg.c | 124 +-
 1 file changed, 12 insertions(+), 112 deletions(-)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index c198b96368dd..8ff687158704 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -66,7 +66,6 @@ static int sg_version_num = 30536;/* 2 digits for each 
component */
 static char *sg_version_date = "20140603";
 
 static int sg_proc_init(void);
-static void sg_proc_cleanup(void);
 #endif
 
 #define SG_ALLOW_DIO_DEF 0
@@ -1661,7 +1660,7 @@ static void __exit
 exit_sg(void)
 {
 #ifdef CONFIG_SCSI_PROC_FS
-   sg_proc_cleanup();
+   remove_proc_subtree("scsi/sg", NULL);
 #endif /* CONFIG_SCSI_PROC_FS */
scsi_unregister_interface(&sg_interface);
class_destroy(sg_sysfs_class);
@@ -2274,11 +2273,6 @@ sg_get_dev(int dev)
 }
 
 #ifdef CONFIG_SCSI_PROC_FS
-
-static struct proc_dir_entry *sg_proc_sgp = NULL;
-
-static char sg_proc_sg_dirname[] = "scsi/sg";
-
 static int sg_proc_seq_show_int(struct seq_file *s, void *v);
 
 static int sg_proc_single_open_adio(struct inode *inode, struct file *file);
@@ -2306,37 +2300,11 @@ static const struct file_operations dressz_fops = {
 };
 
 static int sg_proc_seq_show_version(struct seq_file *s, void *v);
-static int sg_proc_single_open_version(struct inode *inode, struct file *file);
-static const struct file_operations version_fops = {
-   .owner = THIS_MODULE,
-   .open = sg_proc_single_open_version,
-   .read = seq_read,
-   .llseek = seq_lseek,
-   .release = single_release,
-};
-
 static int sg_proc_seq_show_devhdr(struct seq_file *s, void *v);
-static int sg_proc_single_open_devhdr(struct inode *inode, struct file *file);
-static const struct file_operations devhdr_fops = {
-   .owner = THIS_MODULE,
-   .open = sg_proc_single_open_devhdr,
-   .read = seq_read,
-   .llseek = seq_lseek,
-   .release = single_release,
-};
-
 static int sg_proc_seq_show_dev(struct seq_file *s, void *v);
-static int sg_proc_open_dev(struct inode *inode, struct file *file);
 static void * dev_seq_start(struct seq_file *s, loff_t *pos);
 static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos);
 static void dev_seq_stop(struct seq_file *s, void *v);
-static const struct file_operations dev_fops = {
-   .owner = THIS_MODULE,
-   .open = sg_proc_open_dev,
-   .read = seq_read,
-   .llseek = seq_lseek,
-   .release = seq_release,
-};
 static const struct seq_operations dev_seq_ops = {
.start = dev_seq_start,
.next  = dev_seq_next,
@@ -2345,14 +2313,6 @@ static const struct seq_operations dev_seq_ops = {
 };
 
 static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v);
-static int sg_proc_open_devstrs(struct inode *inode, struct file *file);
-static const struct file_operations devstrs_fops = {
-   .owner = THIS_MODULE,
-   .open = sg_proc_open_devstrs,
-   .read = seq_read,
-   .llseek = seq_lseek,
-   .release = seq_release,
-};
 static const struct seq_operations devstrs_seq_ops = {
.start = dev_seq_start,
.next  = dev_seq_next,
@@ -2361,14 +2321,6 @@ static const struct seq_operations devstrs_seq_ops = {
 };
 
 static int sg_proc_seq_show_debug(struct seq_file *s, void *v);
-static int sg_proc_open_debug(struct inode *inode, struct file *file);
-static const struct file_operations debug_fops = {
-   .owner = THIS_MODULE,
-   .open = sg_proc_open_debug,
-   .read = seq_read,
-   .llseek = seq_lseek,
-   .release = seq_release,
-};
 static const struct seq_operations debug_seq_ops = {
.start = dev_seq_start,
.next  = dev_seq_next,
@@ -2376,50 +2328,23 @@ static const struct seq_operations debug_seq_ops = {
.show  = sg_proc_seq_show_debug,
 };
 
-
-struct sg_proc_leaf {
-   const char * name;
-   const struct file_operations * fops;
-};
-
-static const struct sg_proc_leaf sg_proc_leaf_arr[] = {
-   {"allow_dio", &adio_fops},
-   {"debug", &debug_fops},
-   {"def_reserved_size", &dressz_fops},
-   {"device_hdr", &devhdr_fops},
-   {"devices", &dev_fops},
-   {"device_strs", &devstrs_fops},
-   {"version", &version_fops}
-};
-
 static int
 sg_proc_init(void)
 {
-   int num_leaves = ARRAY_SIZE(sg_proc_leaf_arr);
-   int k;
+   struct proc_dir_entry *p;
 
-   sg_proc_sgp = proc_mkdir(sg_proc_sg_dirname, NULL);
-   if (!sg_proc_sgp)
+   p = proc_mkdir("scsi/sg", NULL);
+   if (!p)
return 1;
-   for (k = 0; k < num_leaves; ++k) {
-   const struct sg_proc_leaf *leaf = &sg_proc_leaf_arr[k];
-   umode_t mask = leaf->fops->write ? S_IRUGO | S_IWUSR : S_IRUGO;
-   proc_create(leaf->n

[PATCH net-next 3/4] geneve: check MTU for a minimum in geneve_change_mtu()

2018-04-19 Thread Alexey Kodanev

geneve_change_mtu() will be used not only as ndo_change_mtu() callback,
but also to verify a user specified MTU on a new link creation in the
next patch.

Signed-off-by: Alexey Kodanev 
---
 drivers/net/geneve.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b650f84..ae649f6 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -942,11 +942,10 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, 
struct net_device *dev)
 
 static int geneve_change_mtu(struct net_device *dev, int new_mtu)
 {
-   /* Only possible if called internally, ndo_change_mtu path's new_mtu
-* is guaranteed to be between dev->min_mtu and dev->max_mtu.
-*/
if (new_mtu > dev->max_mtu)
new_mtu = dev->max_mtu;
+   else if (new_mtu < dev->min_mtu)
+   new_mtu = dev->min_mtu;
 
dev->mtu = new_mtu;
return 0;
-- 
1.8.3.1

[PATCH 33/39] atm: simplify procfs code

2018-04-19 Thread Christoph Hellwig

Use remove_proc_subtree to remove the whole subtree on cleanup, and
unwind the registration loop into individual calls.  Switch to use
proc_create_seq where applicable.

Signed-off-by: Christoph Hellwig 
---
 net/atm/proc.c | 65 ++
 1 file changed, 7 insertions(+), 58 deletions(-)

diff --git a/net/atm/proc.c b/net/atm/proc.c
index 55410c00c7e2..f272b0f59d82 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -257,18 +257,6 @@ static const struct seq_operations atm_dev_seq_ops = {
.show   = atm_dev_seq_show,
 };
 
-static int atm_dev_seq_open(struct inode *inode, struct file *file)
-{
-   return seq_open(file, &atm_dev_seq_ops);
-}
-
-static const struct file_operations devices_seq_fops = {
-   .open   = atm_dev_seq_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= seq_release,
-};
-
 static int pvc_seq_show(struct seq_file *seq, void *v)
 {
static char atm_pvc_banner[] =
@@ -440,58 +428,19 @@ void atm_proc_dev_deregister(struct atm_dev *dev)
kfree(dev->proc_name);
 }
 
-static struct atm_proc_entry {
-   char *name;
-   const struct file_operations *proc_fops;
-   struct proc_dir_entry *dirent;
-} atm_proc_ents[] = {
-   { .name = "devices",.proc_fops = &devices_seq_fops },
-   { .name = "pvc",.proc_fops = &pvc_seq_fops },
-   { .name = "svc",.proc_fops = &svc_seq_fops },
-   { .name = "vc", .proc_fops = &vcc_seq_fops },
-   { .name = NULL, .proc_fops = NULL }
-};
-
-static void atm_proc_dirs_remove(void)
-{
-   static struct atm_proc_entry *e;
-
-   for (e = atm_proc_ents; e->name; e++) {
-   if (e->dirent)
-   remove_proc_entry(e->name, atm_proc_root);
-   }
-   remove_proc_entry("atm", init_net.proc_net);
-}
-
 int __init atm_proc_init(void)
 {
-   static struct atm_proc_entry *e;
-   int ret;
-
atm_proc_root = proc_net_mkdir(&init_net, "atm", init_net.proc_net);
if (!atm_proc_root)
-   goto err_out;
-   for (e = atm_proc_ents; e->name; e++) {
-   struct proc_dir_entry *dirent;
-
-   dirent = proc_create(e->name, 0444,
-atm_proc_root, e->proc_fops);
-   if (!dirent)
-   goto err_out_remove;
-   e->dirent = dirent;
-   }
-   ret = 0;
-out:
-   return ret;
-
-err_out_remove:
-   atm_proc_dirs_remove();
-err_out:
-   ret = -ENOMEM;
-   goto out;
+   return -ENOMEM;
+   proc_create_seq("devices", 0444, atm_proc_root, &atm_dev_seq_ops);
+   proc_create("pvc", 0444, atm_proc_root, &pvc_seq_fops);
+   proc_create("svc", 0444, atm_proc_root, &svc_seq_fops);
+   proc_create("vc", 0444, atm_proc_root, &vcc_seq_fops);
+   return 0;
 }
 
 void atm_proc_exit(void)
 {
-   atm_proc_dirs_remove();
+   remove_proc_subtree("atm", init_net.proc_net);
 }
-- 
2.17.0

[PATCH net-next 4/4] geneve: configure MTU based on a lower device

2018-04-19 Thread Alexey Kodanev

Currently, on a new link creation or when 'remote' address parameter
is updated, an MTU is not changed and always equals 1500. When a lower
device has a larger MTU, it might not be efficient, e.g. for UDP, and
requires the manual MTU adjustments to match the MTU of the lower
device.

This patch tries to automate this process, finds a lower device using
the 'remote' address parameter, then uses its MTU to tune GENEVE's MTU:
  * on a new link creation
  * when 'remote' parameter is changed

Also with this patch, the MTU from a user, on a new link creation, is
passed to geneve_change_mtu() where it is verified, and MTU adjustments
with a lower device is skipped in that case. Prior that change, it was
possible to set the invalid MTU values on a new link creation.

Signed-off-by: Alexey Kodanev 
---
 drivers/net/geneve.c | 56 +---
 1 file changed, 53 insertions(+), 3 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index ae649f6..750eaa5 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1387,6 +1387,48 @@ static int geneve_nl2info(struct nlattr *tb[], struct 
nlattr *data[],
return -EOPNOTSUPP;
 }
 
+static void geneve_link_config(struct net_device *dev,
+  struct ip_tunnel_info *info, struct nlattr *tb[])
+{
+   struct geneve_dev *geneve = netdev_priv(dev);
+   int ldev_mtu = 0;
+
+   if (tb[IFLA_MTU]) {
+   geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+   return;
+   }
+
+   switch (ip_tunnel_info_af(info)) {
+   case AF_INET: {
+   struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst };
+   struct rtable *rt = ip_route_output_key(geneve->net, &fl4);
+
+   if (!IS_ERR(rt) && rt->dst.dev) {
+   ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN;
+   ip_rt_put(rt);
+   }
+   break;
+   }
+#if IS_ENABLED(CONFIG_IPV6)
+   case AF_INET6: {
+   struct rt6_info *rt = rt6_lookup(geneve->net,
+&info->key.u.ipv6.dst, NULL, 0,
+NULL, 0);
+
+   if (rt && rt->dst.dev)
+   ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
+   ip6_rt_put(rt);
+   break;
+   }
+#endif
+   }
+
+   if (ldev_mtu <= 0)
+   return;
+
+   geneve_change_mtu(dev, ldev_mtu - info->options_len);
+}
+
 static int geneve_newlink(struct net *net, struct net_device *dev,
  struct nlattr *tb[], struct nlattr *data[],
  struct netlink_ext_ack *extack)
@@ -1402,8 +1444,14 @@ static int geneve_newlink(struct net *net, struct 
net_device *dev,
if (err)
return err;
 
-   return geneve_configure(net, dev, extack, &info, metadata,
-   use_udp6_rx_checksums);
+   err = geneve_configure(net, dev, extack, &info, metadata,
+  use_udp6_rx_checksums);
+   if (err)
+   return err;
+
+   geneve_link_config(dev, &info, tb);
+
+   return 0;
 }
 
 /* Quiesces the geneve device data path for both TX and RX.
@@ -1477,8 +1525,10 @@ static int geneve_changelink(struct net_device *dev, 
struct nlattr *tb[],
if (err)
return err;
 
-   if (!geneve_dst_addr_equal(&geneve->info, &info))
+   if (!geneve_dst_addr_equal(&geneve->info, &info)) {
dst_cache_reset(&info.dst_cache);
+   geneve_link_config(dev, &info, tb);
+   }
 
geneve_quiesce(geneve, &gs4, &gs6);
geneve->info = info;
-- 
1.8.3.1

[PATCH net-next 1/4] geneve: remove white-space before '#if IS_ENABLED(CONFIG_IPV6)'

2018-04-19 Thread Alexey Kodanev

Signed-off-by: Alexey Kodanev 
---
 drivers/net/geneve.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b919e89..45acdc9 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1261,7 +1261,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct 
nlattr *data[],
}
 
if (data[IFLA_GENEVE_REMOTE6]) {
- #if IS_ENABLED(CONFIG_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
if (changelink && (ip_tunnel_info_af(info) == AF_INET)) {
attrtype = IFLA_GENEVE_REMOTE6;
goto change_notsup;
-- 
1.8.3.1

[PATCH net-next 0/4] geneve: verify user specified MTU or adjust with a lower device

2018-04-19 Thread Alexey Kodanev

The first two patches don't introduce any functional changes and
contain minor cleanups for code readability.

The last one adds a new function geneve_link_config() similar to the
other tunnels. The function will be used on a new link creation or
when 'remote' parameter is changed. It adjusts a user specified MTU
or, if it finds a lower device, tunes the tunnel MTU using it.

Alexey Kodanev (4):
  geneve: remove white-space before '#if IS_ENABLED(CONFIG_IPV6)'
  geneve: cleanup hard coded value for Ethernet header length
  geneve: check MTU for a minimum in geneve_change_mtu()
  geneve: configure MTU based on a lower device

 drivers/net/geneve.c | 72 
 1 file changed, 61 insertions(+), 11 deletions(-)

-- 
1.8.3.1

Re: [PATCH] net: phy: TLK10X initial driver submission

2018-04-19 Thread Miguel Ojeda

On Thu, Apr 19, 2018 at 10:28 AM, Måns Andersson  wrote:
> From: Mans Andersson 
>
> Add suport for the TI TLK105 and TLK106 10/100Mbit ethernet phys.
>

Hi Mans,

Some quick notes.

> In addition the TLK10X needs to be removed from DP83848 driver as the
> power back off support is added here for this device.
>
> Datasheet:
> http://www.ti.com/lit/gpn/tlk106

Missing signature.

> ---
>  .../devicetree/bindings/net/ti,tlk10x.txt  |  27 +++
>  drivers/net/phy/Kconfig|   5 +
>  drivers/net/phy/Makefile   |   1 +
>  drivers/net/phy/dp83848.c  |   3 -
>  drivers/net/phy/tlk10x.c   | 209 
> +
>  5 files changed, 242 insertions(+), 3 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/net/ti,tlk10x.txt
>  create mode 100644 drivers/net/phy/tlk10x.c
>
> diff --git a/Documentation/devicetree/bindings/net/ti,tlk10x.txt 
> b/Documentation/devicetree/bindings/net/ti,tlk10x.txt
> new file mode 100644
> index 000..371d0d7
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/net/ti,tlk10x.txt
> @@ -0,0 +1,27 @@
> +* Texas Instruments - TLK105 / TLK106 ethernet PHYs
> +
> +Required properties:
> +   - reg - The ID number for the phy, usually a small integer
> +
> +Optional properties:
> +   - ti,power-back-off - Power Back Off Level
> +   Please refer to data sheet chapter 8.6 and TI Application
> +   Note SLLA3228
> +   0 - Normal Operation
> +   1 - Level 1 (up to 140m cable between TLK link partners)
> +   2 - Level 2 (up to 100m cable between TLK link partners)
> +   3 - Level 3 (up to 80m cable between TLK link partners)
> +
> +Default child nodes are standard Ethernet PHY device
> +nodes as described in Documentation/devicetree/bindings/net/phy.txt
> +
> +Example:
> +
> +   ethernet-phy@0 {
> +   reg = <0>;
> +   ti,power-back-off = <2>;
> +   };
> +
> +Datasheets and documentation can be found at:
> +http://www.ti.com/lit/gpn/tlk106
> +http://www.ti.com/lit/an/slla328/slla328.pdf
> diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
> index bdfbabb..c980240 100644
> --- a/drivers/net/phy/Kconfig
> +++ b/drivers/net/phy/Kconfig
> @@ -295,6 +295,11 @@ config DP83867_PHY
> ---help---
>   Currently supports the DP83867 PHY.
>
> +config TLK10X_PHY
> +   tristate "Texas Instruments TLK10x PHY"
> +   ---help---
> + Supports the TLK105 and TLK106 PHYs.
> +
>  config FIXED_PHY
> tristate "MDIO Bus/PHY emulation with fixed speed/link PHYs"
> depends on PHYLIB
> diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
> index 01acbcb..37e4e02 100644
> --- a/drivers/net/phy/Makefile
> +++ b/drivers/net/phy/Makefile
> @@ -79,5 +79,6 @@ obj-$(CONFIG_ROCKCHIP_PHY)+= rockchip.o
>  obj-$(CONFIG_SMSC_PHY) += smsc.o
>  obj-$(CONFIG_STE10XP)  += ste10Xp.o
>  obj-$(CONFIG_TERANETICS_PHY)   += teranetics.o
> +obj-$(CONFIG_TLK10X_PHY)   += tlk10x.o
>  obj-$(CONFIG_VITESSE_PHY)  += vitesse.o
>  obj-$(CONFIG_XILINX_GMII2RGMII) += xilinx_gmii2rgmii.o
> diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c
> index cd09c3a..435f401 100644
> --- a/drivers/net/phy/dp83848.c
> +++ b/drivers/net/phy/dp83848.c
> @@ -19,7 +19,6 @@
>  #define TI_DP83848C_PHY_ID 0x20005ca0
>  #define TI_DP83620_PHY_ID  0x20005ce0
>  #define NS_DP83848C_PHY_ID 0x20005c90
> -#define TLK10X_PHY_ID  0x2000a210
>
>  /* Registers */
>  #define DP83848_MICR   0x11 /* MII Interrupt Control 
> Register */
> @@ -78,7 +77,6 @@ static struct mdio_device_id __maybe_unused dp83848_tbl[] = 
> {
> { TI_DP83848C_PHY_ID, 0xfff0 },
> { NS_DP83848C_PHY_ID, 0xfff0 },
> { TI_DP83620_PHY_ID, 0xfff0 },
> -   { TLK10X_PHY_ID, 0xfff0 },
> { }
>  };
>  MODULE_DEVICE_TABLE(mdio, dp83848_tbl);
> @@ -105,7 +103,6 @@ static struct phy_driver dp83848_driver[] = {
> DP83848_PHY_DRIVER(TI_DP83848C_PHY_ID, "TI DP83848C 10/100 Mbps PHY"),
> DP83848_PHY_DRIVER(NS_DP83848C_PHY_ID, "NS DP83848C 10/100 Mbps PHY"),
> DP83848_PHY_DRIVER(TI_DP83620_PHY_ID, "TI DP83620 10/100 Mbps PHY"),
> -   DP83848_PHY_DRIVER(TLK10X_PHY_ID, "TI TLK10X 10/100 Mbps PHY"),
>  };
>  module_phy_driver(dp83848_driver);
>
> diff --git a/drivers/net/phy/tlk10x.c b/drivers/net/phy/tlk10x.c
> new file mode 100644
> index 000..1efc81e
> --- /dev/null
> +++ b/drivers/net/phy/tlk10x.c
> @@ -0,0 +1,209 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/**
> + * Driver for the Texas Instruments TLK105 / TLK106
> + *
> + * Copyright (C) 2018 NIBE Industrier AB - http://www.nibe.com
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as publi

Re: [PATCH bpf-next v3 6/8] bpf: add documentation for eBPF helpers (42-50)

2018-04-19 Thread Daniel Borkmann

On 04/17/2018 04:34 PM, Quentin Monnet wrote:
> Add documentation for eBPF helper functions to bpf.h user header file.
> This documentation can be parsed with the Python script provided in
> another commit of the patch series, in order to provide a RST document
> that can later be converted into a man page.
> 
> The objective is to make the documentation easily understandable and
> accessible to all eBPF developers, including beginners.
> 
> This patch contains descriptions for the following helper functions:
> 
> Helper from Kaixu:
> - bpf_perf_event_read()
> 
> Helpers from Martin:
> - bpf_skb_under_cgroup()
> - bpf_xdp_adjust_head()
> 
> Helpers from Sargun:
> - bpf_probe_write_user()
> - bpf_current_task_under_cgroup()
> 
> Helper from Thomas:
> - bpf_skb_change_head()
> 
> Helper from Gianluca:
> - bpf_probe_read_str()
> 
> Helpers from Chenbo:
> - bpf_get_socket_cookie()
> - bpf_get_socket_uid()
> 
> v3:
> - bpf_perf_event_read(): Fix time of selection for perf event type in
>   description. Remove occurences of "cores" to avoid confusion with
>   "CPU".
> 
> Cc: Kaixu Xia 
> Cc: Martin KaFai Lau 
> Cc: Sargun Dhillon 
> Cc: Thomas Graf 
> Cc: Gianluca Borello 
> Cc: Chenbo Feng 
> Signed-off-by: Quentin Monnet 
> ---
>  include/uapi/linux/bpf.h | 158 
> +++
>  1 file changed, 158 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 3a40f5debac2..dd79a1c82adf 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -753,6 +753,25 @@ union bpf_attr {
>   *   Return
>   *   0 on success, or a negative error in case of failure.
>   *
> + * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
> + *   Description
> + *   Read the value of a perf event counter. This helper relies on a
> + *   *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
> + *   the perf event counter is selected when *map* is updated with
> + *   perf event file descriptors. The *map* is an array whose size
> + *   is the number of available CPUs, and each cell contains a value
> + *   relative to one CPU. The value to retrieve is indicated by
> + *   *flags*, that contains the index of the CPU to look up, masked
> + *   with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
> + *   **BPF_F_CURRENT_CPU** to indicate that the value for the
> + *   current CPU should be retrieved.
> + *
> + *   Note that before Linux 4.13, only hardware perf event can be
> + *   retrieved.
> + *   Return
> + *   The value of the perf event counter read from the map, or a
> + *   negative error code in case of failure.
> + *
>   * int bpf_redirect(u32 ifindex, u64 flags)
>   *   Description
>   *   Redirect the packet to another net device of index *ifindex*.
> @@ -965,6 +984,17 @@ union bpf_attr {
>   *   Return
>   *   0 on success, or a negative error in case of failure.
>   *
> + * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 
> index)
> + *   Description
> + *   Check whether *skb* is a descendant of the cgroup2 held by
> + *   *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
> + *   Return
> + *   The return value depends on the result of the test, and can be:
> + *
> + *   * 0, if the *skb* failed the cgroup2 descendant test.
> + *   * 1, if the *skb* succeeded the cgroup2 descendant test.
> + *   * A negative error code, if an error occurred.
> + *
>   * u32 bpf_get_hash_recalc(struct sk_buff *skb)
>   *   Description
>   *   Retrieve the hash of the packet, *skb*\ **->hash**. If it is
> @@ -985,6 +1015,37 @@ union bpf_attr {
>   *   Return
>   *   A pointer to the current task struct.
>   *
> + * int bpf_probe_write_user(void *dst, const void *src, u32 len)
> + *   Description
> + *   Attempt in a safe way to write *len* bytes from the buffer
> + *   *src* to *dst* in memory. It only works for threads that are in
> + *   user context.

Plus the dst address must be a valid user space address.

> + *   This helper should not be used to implement any kind of
> + *   security mechanism because of TOC-TOU attacks, but rather to
> + *   debug, divert, and manipulate execution of semi-cooperative
> + *   processes.
> + *
> + *   Keep in mind that this feature is meant for experiments, and it
> + *   has a risk of crashing the system and running programs.

Ditto, crashing user space applications.

> + *   Therefore, when an eBPF program using this helper is attached,
> + *   a warning including PID and process name is printed to kernel
> + *   logs.
> + *   Return
> + *   0 on success, or a negative error in case of failure.
> + *
> + * int bpf_current_task_under_cgroup(struct bpf_map *map, u32

[PATCH 37/39] ide: remove ide_driver_proc_write

2018-04-19 Thread Christoph Hellwig

The driver proc file hasn't been writeable for a long time, so this is
just dead code.

Signed-off-by: Christoph Hellwig 
---
 drivers/ide/ide-proc.c | 46 --
 1 file changed, 46 deletions(-)

diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 863db44c7916..b3b8b8822d6a 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -528,58 +528,12 @@ static int ide_driver_proc_open(struct inode *inode, 
struct file *file)
return single_open(file, ide_driver_proc_show, PDE_DATA(inode));
 }
 
-static int ide_replace_subdriver(ide_drive_t *drive, const char *driver)
-{
-   struct device *dev = &drive->gendev;
-   int ret = 1;
-   int err;
-
-   device_release_driver(dev);
-   /* FIXME: device can still be in use by previous driver */
-   strlcpy(drive->driver_req, driver, sizeof(drive->driver_req));
-   err = device_attach(dev);
-   if (err < 0)
-   printk(KERN_WARNING "IDE: %s: device_attach error: %d\n",
-   __func__, err);
-   drive->driver_req[0] = 0;
-   if (dev->driver == NULL) {
-   err = device_attach(dev);
-   if (err < 0)
-   printk(KERN_WARNING
-   "IDE: %s: device_attach(2) error: %d\n",
-   __func__, err);
-   }
-   if (dev->driver && !strcmp(dev->driver->name, driver))
-   ret = 0;
-
-   return ret;
-}
-
-static ssize_t ide_driver_proc_write(struct file *file, const char __user 
*buffer,
-size_t count, loff_t *pos)
-{
-   ide_drive_t *drive = PDE_DATA(file_inode(file));
-   char name[32];
-
-   if (!capable(CAP_SYS_ADMIN))
-   return -EACCES;
-   if (count > 31)
-   count = 31;
-   if (copy_from_user(name, buffer, count))
-   return -EFAULT;
-   name[count] = '\0';
-   if (ide_replace_subdriver(drive, name))
-   return -EINVAL;
-   return count;
-}
-
 static const struct file_operations ide_driver_proc_fops = {
.owner  = THIS_MODULE,
.open   = ide_driver_proc_open,
.read   = seq_read,
.llseek = seq_lseek,
.release= single_release,
-   .write  = ide_driver_proc_write,
 };
 
 static int ide_media_proc_show(struct seq_file *m, void *v)
-- 
2.17.0

[PATCH 25/39] drbd: switch to proc_create_single

2018-04-19 Thread Christoph Hellwig

And stop messing with try_module_get on THIS_MODULE, which doesn't make
any sense here.

Signed-off-by: Christoph Hellwig 
---
 drivers/block/drbd/drbd_int.h  |  2 +-
 drivers/block/drbd/drbd_main.c |  3 ++-
 drivers/block/drbd/drbd_proc.c | 34 +-
 3 files changed, 4 insertions(+), 35 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 06ecee1b528e..461ddec04e7c 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1643,7 +1643,7 @@ void drbd_bump_write_ordering(struct drbd_resource 
*resource, struct drbd_backin
 
 /* drbd_proc.c */
 extern struct proc_dir_entry *drbd_proc;
-extern const struct file_operations drbd_proc_fops;
+int drbd_seq_show(struct seq_file *seq, void *v);
 
 /* drbd_actlog.c */
 extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct 
drbd_interval *i);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 185f1ef00a7c..c2d154faac02 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3010,7 +3010,8 @@ static int __init drbd_init(void)
goto fail;
 
err = -ENOMEM;
-   drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, 
&drbd_proc_fops, NULL);
+   drbd_proc = proc_create_single("drbd", S_IFREG | S_IRUGO , NULL,
+   drbd_seq_show);
if (!drbd_proc) {
pr_err("unable to register proc file\n");
goto fail;
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 582caeb0de86..74ef29247bb5 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -33,18 +33,7 @@
 #include 
 #include "drbd_int.h"
 
-static int drbd_proc_open(struct inode *inode, struct file *file);
-static int drbd_proc_release(struct inode *inode, struct file *file);
-
-
 struct proc_dir_entry *drbd_proc;
-const struct file_operations drbd_proc_fops = {
-   .owner  = THIS_MODULE,
-   .open   = drbd_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= drbd_proc_release,
-};
 
 static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
 {
@@ -235,7 +224,7 @@ static void drbd_syncer_progress(struct drbd_device 
*device, struct seq_file *se
}
 }
 
-static int drbd_seq_show(struct seq_file *seq, void *v)
+int drbd_seq_show(struct seq_file *seq, void *v)
 {
int i, prev_i = -1;
const char *sn;
@@ -345,24 +334,3 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 
return 0;
 }
-
-static int drbd_proc_open(struct inode *inode, struct file *file)
-{
-   int err;
-
-   if (try_module_get(THIS_MODULE)) {
-   err = single_open(file, drbd_seq_show, NULL);
-   if (err)
-   module_put(THIS_MODULE);
-   return err;
-   }
-   return -ENODEV;
-}
-
-static int drbd_proc_release(struct inode *inode, struct file *file)
-{
-   module_put(THIS_MODULE);
-   return single_release(inode, file);
-}
-
-/* PROC FS stuff end */
-- 
2.17.0

[PATCH net-next 1/2] qed* : use trust mode to allow VF to override forced MAC

2018-04-19 Thread Shahed Shaikh

As per existing behavior, when PF sets a MAC address for a VF
(also called as forced MAC), VF is not allowed to change its
MAC address afterwards.
This puts the limitation on few use cases such as bonding of VFs,
where bonding driver asks VF to change its MAC address.

This patch uses a VF trust mode to allow VF to change its MAC address
in spite PF has set a forced MAC for that VF.

Signed-off-by: Shahed Shaikh 
---
 drivers/net/ethernet/qlogic/qed/qed_sriov.c| 210 +++--
 drivers/net/ethernet/qlogic/qede/qede_filter.c |   3 +-
 2 files changed, 195 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c 
b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 5acb91b..77376fd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -48,7 +48,7 @@ static int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn,
   u8 opcode,
   __le16 echo,
   union event_ring_data *data, u8 fw_return_code);
-
+static int qed_iov_bulletin_set_mac(struct qed_hwfn *p_hwfn, u8 *mac, int 
vfid);
 
 static u8 qed_vf_calculate_legacy(struct qed_vf_info *p_vf)
 {
@@ -1790,7 +1790,8 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn 
*p_hwfn,
if (!p_vf->vport_instance)
return -EINVAL;
 
-   if (events & BIT(MAC_ADDR_FORCED)) {
+   if ((events & BIT(MAC_ADDR_FORCED)) ||
+   p_vf->p_vf_info.is_trusted_configured) {
/* Since there's no way [currently] of removing the MAC,
 * we can always assume this means we need to force it.
 */
@@ -1809,8 +1810,12 @@ static int qed_iov_configure_vport_forced(struct 
qed_hwfn *p_hwfn,
  "PF failed to configure MAC for VF\n");
return rc;
}
-
-   p_vf->configured_features |= 1 << MAC_ADDR_FORCED;
+   if (p_vf->p_vf_info.is_trusted_configured)
+   p_vf->configured_features |=
+   BIT(VFPF_BULLETIN_MAC_ADDR);
+   else
+   p_vf->configured_features |=
+   BIT(MAC_ADDR_FORCED);
}
 
if (events & BIT(VLAN_ADDR_FORCED)) {
@@ -3170,6 +3175,10 @@ static int qed_iov_vf_update_mac_shadow(struct qed_hwfn 
*p_hwfn,
if (p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED))
return 0;
 
+   /* Don't keep track of shadow copy since we don't intend to restore. */
+   if (p_vf->p_vf_info.is_trusted_configured)
+   return 0;
+
/* First remove entries and then add new ones */
if (p_params->opcode == QED_FILTER_REMOVE) {
for (i = 0; i < QED_ETH_VF_NUM_MAC_FILTERS; i++) {
@@ -3244,9 +3253,17 @@ static int qed_iov_chk_ucast(struct qed_hwfn *hwfn,
 
/* No real decision to make; Store the configured MAC */
if (params->type == QED_FILTER_MAC ||
-   params->type == QED_FILTER_MAC_VLAN)
+   params->type == QED_FILTER_MAC_VLAN) {
ether_addr_copy(vf->mac, params->mac);
 
+   if (vf->is_trusted_configured) {
+   qed_iov_bulletin_set_mac(hwfn, vf->mac, vfid);
+
+   /* Update and post bulleitin again */
+   qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG);
+   }
+   }
+
return 0;
 }
 
@@ -4081,16 +4098,60 @@ static void qed_iov_bulletin_set_forced_mac(struct 
qed_hwfn *p_hwfn,
return;
}
 
-   feature = 1 << MAC_ADDR_FORCED;
+   if (vf_info->p_vf_info.is_trusted_configured) {
+   feature = BIT(VFPF_BULLETIN_MAC_ADDR);
+   /* Trust mode will disable Forced MAC */
+   vf_info->bulletin.p_virt->valid_bitmap &=
+   ~BIT(MAC_ADDR_FORCED);
+   } else {
+   feature = BIT(MAC_ADDR_FORCED);
+   /* Forced MAC will disable MAC_ADDR */
+   vf_info->bulletin.p_virt->valid_bitmap &=
+   ~BIT(VFPF_BULLETIN_MAC_ADDR);
+   }
+
memcpy(vf_info->bulletin.p_virt->mac, mac, ETH_ALEN);
 
vf_info->bulletin.p_virt->valid_bitmap |= feature;
-   /* Forced MAC will disable MAC_ADDR */
-   vf_info->bulletin.p_virt->valid_bitmap &= ~BIT(VFPF_BULLETIN_MAC_ADDR);
 
qed_iov_configure_vport_forced(p_hwfn, vf_info, feature);
 }
 
+static int qed_iov_bulletin_set_mac(struct qed_hwfn *p_hwfn, u8 *mac, int vfid)
+{
+   struct qed_vf_info *vf_info;
+   u64 feature;
+
+   vf_info = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true);
+   if (!vf_info) {
+   DP_NOTICE(p_hwfn->cdev, "Can not set MAC, invalid vfid [%d]\n",
+ vfid);
+   return -EINVAL;
+   }
+
+   if (vf_info->b_malicious) {
+   DP_NOTIC

[PATCH 23/39] staging/rtl8192u: simplify procfs code

2018-04-19 Thread Christoph Hellwig

Unwind the registration loop into individual calls.  Switch to use
proc_create_single where applicable.

Signed-off-by: Christoph Hellwig 
---
 drivers/staging/rtl8192u/r8192U_core.c | 67 ++
 1 file changed, 14 insertions(+), 53 deletions(-)

diff --git a/drivers/staging/rtl8192u/r8192U_core.c 
b/drivers/staging/rtl8192u/r8192U_core.c
index d607c59761cf..7a0dbc0fa18e 100644
--- a/drivers/staging/rtl8192u/r8192U_core.c
+++ b/drivers/staging/rtl8192u/r8192U_core.c
@@ -646,64 +646,25 @@ static void rtl8192_proc_module_init(void)
rtl8192_proc = proc_mkdir(RTL819xU_MODULE_NAME, init_net.proc_net);
 }
 
-/*
- * seq_file wrappers for procfile show routines.
- */
-static int rtl8192_proc_open(struct inode *inode, struct file *file)
-{
-   struct net_device *dev = proc_get_parent_data(inode);
-   int (*show)(struct seq_file *, void *) = PDE_DATA(inode);
-
-   return single_open(file, show, dev);
-}
-
-static const struct file_operations rtl8192_proc_fops = {
-   .open   = rtl8192_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
-
-/*
- * Table of proc files we need to create.
- */
-struct rtl8192_proc_file {
-   char name[12];
-   int (*show)(struct seq_file *, void *);
-};
-
-static const struct rtl8192_proc_file rtl8192_proc_files[] = {
-   { "stats-rx",   &proc_get_stats_rx },
-   { "stats-tx",   &proc_get_stats_tx },
-   { "stats-ap",   &proc_get_stats_ap },
-   { "registers",  &proc_get_registers },
-   { "" }
-};
-
 static void rtl8192_proc_init_one(struct net_device *dev)
 {
-   const struct rtl8192_proc_file *f;
struct proc_dir_entry *dir;
 
-   if (rtl8192_proc) {
-   dir = proc_mkdir_data(dev->name, 0, rtl8192_proc, dev);
-   if (!dir) {
-   RT_TRACE(COMP_ERR,
-"Unable to initialize /proc/net/rtl8192/%s\n",
-dev->name);
-   return;
-   }
+   if (!rtl8192_proc)
+   return;
 
-   for (f = rtl8192_proc_files; f->name[0]; f++) {
-   if (!proc_create_data(f->name, S_IFREG | S_IRUGO, dir,
- &rtl8192_proc_fops, f->show)) {
-   RT_TRACE(COMP_ERR,
-"Unable to initialize 
/proc/net/rtl8192/%s/%s\n",
-dev->name, f->name);
-   return;
-   }
-   }
-   }
+   dir = proc_mkdir_data(dev->name, 0, rtl8192_proc, dev);
+   if (!dir)
+   return;
+
+   proc_create_single("stats-rx", S_IFREG | S_IRUGO, dir,
+   proc_get_stats_rx);
+   proc_create_single("stats-tx", S_IFREG | S_IRUGO, dir,
+   proc_get_stats_tx);
+   proc_create_single("stats-ap", S_IFREG | S_IRUGO, dir,
+   proc_get_stats_ap);
+   proc_create_single("registers", S_IFREG | S_IRUGO, dir,
+   proc_get_registers);
 }
 
 static void rtl8192_proc_remove_one(struct net_device *dev)
-- 
2.17.0

[PATCH net-next 2/2] qed* : Add new TLV to request PF to update MAC in bulletin board

2018-04-19 Thread Shahed Shaikh

There may be a need for VF driver to request PF to explicitly update its
bulletin with a MAC address.
e.g. When user assigns a MAC address to VF while VF is still down,
and PF's bulletin board contains different MAC address, in this case,
when VF's interface is brought up, it gets loaded with MAC address from
bulletin board which is not desirable.

To handle this corner case, we need a new TLV to request PF to update
its bulletin board with suggested MAC.

This request will be honored only for trusted VFs.

Signed-off-by: Shahed Shaikh 
---
 drivers/net/ethernet/qlogic/qed/qed_l2.c   | 19 +
 drivers/net/ethernet/qlogic/qed/qed_sriov.c| 37 ++
 drivers/net/ethernet/qlogic/qed/qed_vf.c   | 29 
 drivers/net/ethernet/qlogic/qed/qed_vf.h   | 21 +++
 drivers/net/ethernet/qlogic/qede/qede_filter.c |  4 +++
 include/linux/qed/qed_eth_if.h |  1 +
 6 files changed, 111 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c 
b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index e874504..8b1b7e8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2850,6 +2850,24 @@ static int qed_fp_cqe_completion(struct qed_dev *dev,
  cqe);
 }
 
+static int qed_req_bulletin_update_mac(struct qed_dev *cdev, u8 *mac)
+{
+   int i, ret;
+
+   if (IS_PF(cdev))
+   return 0;
+
+   for_each_hwfn(cdev, i) {
+   struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+   ret = qed_vf_pf_bulletin_update_mac(p_hwfn, mac);
+   if (ret)
+   return ret;
+   }
+
+   return 0;
+}
+
 #ifdef CONFIG_QED_SRIOV
 extern const struct qed_iov_hv_ops qed_iov_ops_pass;
 #endif
@@ -2887,6 +2905,7 @@ static const struct qed_eth_ops qed_eth_ops_pass = {
.ntuple_filter_config = &qed_ntuple_arfs_filter_config,
.configure_arfs_searcher = &qed_configure_arfs_searcher,
.get_coalesce = &qed_get_coalesce,
+   .req_bulletin_update_mac = &qed_req_bulletin_update_mac,
 };
 
 const struct qed_eth_ops *qed_get_eth_ops(void)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c 
b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 77376fd..f01bf52 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -3820,6 +3820,40 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn,
__qed_vf_get_link_caps(p_hwfn, p_caps, p_bulletin);
 }
 
+static int
+qed_iov_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ struct qed_vf_info *p_vf)
+{
+   struct qed_bulletin_content *p_bulletin = p_vf->bulletin.p_virt;
+   struct qed_iov_vf_mbx *mbx = &p_vf->vf_mbx;
+   struct vfpf_bulletin_update_mac_tlv *p_req;
+   u8 status = PFVF_STATUS_SUCCESS;
+   int rc = 0;
+
+   if (!p_vf->p_vf_info.is_trusted_configured) {
+   DP_VERBOSE(p_hwfn,
+  QED_MSG_IOV,
+  "Blocking bulletin update request from untrusted 
VF[%d]\n",
+  p_vf->abs_vf_id);
+   status = PFVF_STATUS_NOT_SUPPORTED;
+   rc = -EINVAL;
+   goto send_status;
+   }
+
+   p_req = &mbx->req_virt->bulletin_update_mac;
+   ether_addr_copy(p_bulletin->mac, p_req->mac);
+   DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+  "Updated bulletin of VF[%d] with requested MAC[%pM]\n",
+  p_vf->abs_vf_id, p_req->mac);
+
+send_status:
+   qed_iov_prepare_resp(p_hwfn, p_ptt, p_vf,
+CHANNEL_TLV_BULLETIN_UPDATE_MAC,
+sizeof(struct pfvf_def_resp_tlv), status);
+   return rc;
+}
+
 static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, int vfid)
 {
@@ -3899,6 +3933,9 @@ static void qed_iov_process_mbx_req(struct qed_hwfn 
*p_hwfn,
case CHANNEL_TLV_COALESCE_READ:
qed_iov_vf_pf_get_coalesce(p_hwfn, p_ptt, p_vf);
break;
+   case CHANNEL_TLV_BULLETIN_UPDATE_MAC:
+   qed_iov_vf_pf_bulletin_update_mac(p_hwfn, p_ptt, p_vf);
+   break;
}
} else if (qed_iov_tlv_supported(mbx->first_tlv.tl.type)) {
DP_VERBOSE(p_hwfn, QED_MSG_IOV,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c 
b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 91b5e9f..2d7fcd6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -1375,6 +1375,35 @@ int qed_vf_pf_get_coalesce(struct qed_hwfn *p_hwfn,
 }
 
 int
+qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn,
+ u8 *p_mac)
+{
+   struct qed_vf_iov *p_

Re: [RFC PATCH ghak32 V2 12/13] audit: NETFILTER_PKT: record each container ID associated with a netNS

2018-04-19 Thread Richard Guy Briggs

On 2018-04-18 22:10, Paul Moore wrote:
> On Fri, Mar 16, 2018 at 5:00 AM, Richard Guy Briggs  wrote:
> > Add container ID auxiliary record(s) to NETFILTER_PKT event standalone
> > records.  Iterate through all potential container IDs associated with a
> > network namespace.
> >
> > Signed-off-by: Richard Guy Briggs 
> > ---
> >  kernel/audit.c   |  1 +
> >  kernel/auditsc.c |  2 ++
> >  net/netfilter/xt_AUDIT.c | 15 ++-
> >  3 files changed, 17 insertions(+), 1 deletion(-)
> >
> > diff --git a/kernel/audit.c b/kernel/audit.c
> > index 08662b4..3c77e47 100644
> > --- a/kernel/audit.c
> > +++ b/kernel/audit.c
> > @@ -2102,6 +2102,7 @@ int audit_log_container_info(struct audit_context 
> > *context,
> > audit_log_end(ab);
> > return 0;
> >  }
> > +EXPORT_SYMBOL(audit_log_container_info);
> >
> >  void audit_log_key(struct audit_buffer *ab, char *key)
> >  {
> > diff --git a/kernel/auditsc.c b/kernel/auditsc.c
> > index 208da962..af68d01 100644
> > --- a/kernel/auditsc.c
> > +++ b/kernel/auditsc.c
> > @@ -975,6 +975,7 @@ struct audit_context *audit_alloc_local(void)
> > context->in_syscall = 1;
> > return context;
> >  }
> > +EXPORT_SYMBOL(audit_alloc_local);
> >
> >  inline void audit_free_context(struct audit_context *context)
> >  {
> > @@ -989,6 +990,7 @@ inline void audit_free_context(struct audit_context 
> > *context)
> > audit_proctitle_free(context);
> > kfree(context);
> >  }
> > +EXPORT_SYMBOL(audit_free_context);
> >
> >  static int audit_log_pid_context(struct audit_context *context, pid_t pid,
> >  kuid_t auid, kuid_t uid, unsigned int 
> > sessionid,
> > diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
> > index c502419..edaa456 100644
> > --- a/net/netfilter/xt_AUDIT.c
> > +++ b/net/netfilter/xt_AUDIT.c
> > @@ -71,10 +71,14 @@ static bool audit_ip6(struct audit_buffer *ab, struct 
> > sk_buff *skb)
> >  {
> > struct audit_buffer *ab;
> > int fam = -1;
> > +   struct audit_context *context = audit_alloc_local();
> > +   struct audit_containerid *cont;
> > +   int i = 0;
> > +   struct net *net;
> >
> > if (audit_enabled == 0)
> > goto errout;
> 
> Do I need to say it?  I probably should ... the allocation should
> happen after the audit_enabled check.

Already fixed in V3 in my tree a couple of weeks ago...
More timely review please?

> > -   ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
> > +   ab = audit_log_start(context, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
> > if (ab == NULL)
> > goto errout;
> >
> > @@ -104,7 +108,16 @@ static bool audit_ip6(struct audit_buffer *ab, struct 
> > sk_buff *skb)
> >
> > audit_log_end(ab);
> >
> > +   net = sock_net(NETLINK_CB(skb).sk);
> > +   list_for_each_entry(cont, &net->audit_containerid, list) {
> > +   char buf[14];
> > +
> > +   sprintf(buf, "net%u", i++);
> > +   audit_log_container_info(context, buf, cont->id);
> > +   }
> 
> It seems like this could (should?) be hidden inside an audit function,
> e.g. audit_log_net_containers() or something like that.

Perhaps...  It was open-coded since at this point there are no other
users.  That'll make this tidier though.

> >  errout:
> > +   audit_free_context(context);
> > return XT_CONTINUE;
> >  }
> 
> -- 
> paul moore
> www.paul-moore.com

- RGB

--
Richard Guy Briggs 
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635

[PATCH 24/39] resource: switch to proc_create_seq_data

2018-04-19 Thread Christoph Hellwig

And use the root resource directly from the proc private data.

Signed-off-by: Christoph Hellwig 
---
 kernel/resource.c | 43 +--
 1 file changed, 5 insertions(+), 38 deletions(-)

diff --git a/kernel/resource.c b/kernel/resource.c
index 2af6c03858b9..b589dda910b3 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -87,7 +87,7 @@ enum { MAX_IORES_LEVEL = 5 };
 static void *r_start(struct seq_file *m, loff_t *pos)
__acquires(resource_lock)
 {
-   struct resource *p = m->private;
+   struct resource *p = PDE_DATA(file_inode(m->file));
loff_t l = 0;
read_lock(&resource_lock);
for (p = p->child; p && l < *pos; p = r_next(m, p, &l))
@@ -103,7 +103,7 @@ static void r_stop(struct seq_file *m, void *v)
 
 static int r_show(struct seq_file *m, void *v)
 {
-   struct resource *root = m->private;
+   struct resource *root = PDE_DATA(file_inode(m->file));
struct resource *r = v, *p;
unsigned long long start, end;
int width = root->end < 0x1 ? 4 : 8;
@@ -135,44 +135,11 @@ static const struct seq_operations resource_op = {
.show   = r_show,
 };
 
-static int ioports_open(struct inode *inode, struct file *file)
-{
-   int res = seq_open(file, &resource_op);
-   if (!res) {
-   struct seq_file *m = file->private_data;
-   m->private = &ioport_resource;
-   }
-   return res;
-}
-
-static int iomem_open(struct inode *inode, struct file *file)
-{
-   int res = seq_open(file, &resource_op);
-   if (!res) {
-   struct seq_file *m = file->private_data;
-   m->private = &iomem_resource;
-   }
-   return res;
-}
-
-static const struct file_operations proc_ioports_operations = {
-   .open   = ioports_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= seq_release,
-};
-
-static const struct file_operations proc_iomem_operations = {
-   .open   = iomem_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= seq_release,
-};
-
 static int __init ioresources_init(void)
 {
-   proc_create("ioports", 0, NULL, &proc_ioports_operations);
-   proc_create("iomem", 0, NULL, &proc_iomem_operations);
+   proc_create_seq_data("ioports", 0, NULL, &resource_op,
+   &ioport_resource);
+   proc_create_seq_data("iomem", 0, NULL, &resource_op, &iomem_resource);
return 0;
 }
 __initcall(ioresources_init);
-- 
2.17.0

[PATCH net-next 0/2] qed* : Use trust mode to override forced MAC

2018-04-19 Thread Shahed Shaikh

Hi Dave,

This patchset adds a support to override forced MAC (MAC set by PF for a VF)
when trust mode is enabled using
#ip link set dev  vf  trust on

First patch adds a real change to use .ndo_set_vf_trust to override forced MAC
and allow user to change VFs from VF interface itself.

Second patch takes care of a corner case, where MAC change from VF won't
take effect when VF interface is down, by introducing a new TLV
(a way to send message from VF to PF) to give a hint to PF to update
its bulletin board.

Please apply this series to net-next.

Thanks,
Shahed

Shahed Shaikh (2):
  qed* : use trust mode to allow VF to override forced MAC
  qed* : Add new TLV to request PF to update MAC in bulletin board

 drivers/net/ethernet/qlogic/qed/qed_l2.c   |  19 ++
 drivers/net/ethernet/qlogic/qed/qed_sriov.c| 247 +++--
 drivers/net/ethernet/qlogic/qed/qed_vf.c   |  29 +++
 drivers/net/ethernet/qlogic/qed/qed_vf.h   |  21 +++
 drivers/net/ethernet/qlogic/qede/qede_filter.c |   7 +-
 include/linux/qed/qed_eth_if.h |   1 +
 6 files changed, 306 insertions(+), 18 deletions(-)

-- 
2.7.4

[bisected] Stack overflow after fs: "switch the IO-triggering parts of umount to fs_pin" (was net namespaces kernel stack overflow)

2018-04-19 Thread Kirill Tkhai

Hi, Al,

commit 87b95ce0964c016ede92763be9c164e49f1019e9 is the first after which the 
below test crashes the kernel:

Author: Al Viro 
Date:   Sat Jan 10 19:01:08 2015 -0500

switch the IO-triggering parts of umount to fs_pin

Signed-off-by: Al Viro 

$modprobe dummy

$while true
 do
 mkdir /var/run/netns
 touch /var/run/netns/init_net
 mount --bind /proc/1/ns/net /var/run/netns/init_net

 ip netns add foo
 ip netns exec foo ip link add dummy0 type dummy
 ip netns delete foo
done

[   22.058349] ip (3249) used greatest stack depth: 8 bytes left
[   22.182195] BUG: unable to handle kernel paging request at 00035bb1f080
[   22.183065] IP: [] kick_process+0x34/0x80
[   22.183065] PGD 0 
[   22.183065] Thread overran stack, or stack corrupted
[   22.183065] Oops:  [#1] PREEMPT SMP 
[   22.183065] CPU: 1 PID: 3255 Comm: ip Not tainted 3.19.0-rc5+ #111
[   22.183065] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.11.1-1 04/01/2014
[   22.183065] task: 88007c475100 ti: 88007b3cc000 task.ti: 
88007b3cc000
[   22.183065] RIP: 0010:[]  [] 
kick_process+0x34/0x80
[   22.183065] RSP: 0018:88007b3cfcf8  EFLAGS: 00010293
[   22.183065] RAX: 00012900 RBX: 88007c475100 RCX: 88007b20e7b8
[   22.183065] RDX: 7b3cc028 RSI: 819b05f8 RDI: 819cb999
[   22.183065] RBP: 88007b3cfd08 R08: 81cbf688 R09: 88007d3d0810
[   22.183065] R10: 88007fc933c8 R11:  R12: 7b3cc028
[   22.183065] R13: 88007c475100 R14:  R15: 7fff7793a448
[   22.183065] FS:  7fc987546700() GS:88007fc8() 
knlGS:
[   22.183065] CS:  0010 DS:  ES:  CR0: 8005003b
[   22.183065] CR2: 00035bb1f080 CR3: 01c11000 CR4: 06e0
[   22.183065] Stack:
[   22.183065]  88007c3b67b8 88007b3cfd98 88007b3cfd18 
81066b05
[   22.183065]  88007b3cfd38 81176f4c 88007b3cfd48 
88007c3b68a0
[   22.183065]  88007b3cfd48 811f 88007b3cfd68 
81177a49
[   22.183065] Call Trace:
[   22.183065]  [] task_work_add+0x45/0x60
[   22.183065]  [] mntput_no_expire+0xdc/0x150
[   22.183065]  [] mntput+0x1f/0x30
[   22.183065]  [] drop_mountpoint+0x29/0x30
[   22.183065]  [] pin_kill+0x66/0xf0
[   22.183065]  [] ? __wake_up_common+0x90/0x90
[   22.183065]  [] group_pin_kill+0x19/0x40
[   22.183065]  [] namespace_unlock+0x58/0x60
[   22.183065]  [] drop_collected_mounts+0x4e/0x60
[   22.183065]  [] put_mnt_ns+0x2d/0x50
[   22.183065]  [] free_nsproxy+0x1a/0x80
[   22.183065]  [] switch_task_namespaces+0x58/0x70
[   22.183065]  [] exit_task_namespaces+0xb/0x10
[   22.183065]  [] do_exit+0x2c7/0xc00
[   22.183065]  [] do_group_exit+0x3a/0xa0
[   22.183065]  [] SyS_exit_group+0xf/0x10
[   22.183065]  [] system_call_fastpath+0x12/0x17

Kirill

On 19.04.2018 01:08, Kirill Tkhai wrote:
> Hi, Alexander!
> 
> On 18.04.2018 22:45, Alexander Aring wrote:
>> I currently can crash my net/master kernel by execute the following script:
>>
>> --- snip
>>
>> modprobe dummy
>>
>> #mkdir /var/run/netns
>> #touch /var/run/netns/init_net
>> #mount --bind /proc/1/ns/net /var/run/netns/init_net
>>
>> while true
>> do
>> mkdir /var/run/netns
>> touch /var/run/netns/init_net
>> mount --bind /proc/1/ns/net /var/run/netns/init_net
>>
>> ip netns add foo
>> ip netns exec foo ip link add dummy0 type dummy
>> ip netns delete foo
>> done
> 
> Fast answer is the best, so I tried your test on my not-for-work computer.
> There is old kernel without asynchronous pernet operations:
> 
> $uname -a
> Linux localhost.localdomain 4.15.0-2-amd64 #1 SMP Debian 4.15.11-1 
> (2018-03-20) x86_64 GNU/Linux
> 
> After approximately 15 seconds of your test execution it died :(
> (Hopefully, I executed it in "init 1" with all partitions RO as usual).
> 
> There is no serial console, so I can't say that the first stack is exactly
> the same as you see. But it crashed. So, it seems, the problem have been
> existing long ago.
> 
> Have you tried to reproduce it in older kernels or to bisect the problem 
> commit?
> Or maybe it doesn't reproduce on old kernels in your environment?
> 
>> --- snap
>>
>> After max ~1 minute the kernel will crash.
>> Doing my hack of saving init_net outside the loop it will run fine...
>> So the mount bind is necessary.
>>
>> The last message which I see is:
>>
>> BUG: stack guard page was hit at f0751759 (stack is
>> 69363195..73ddc474)
>> kernel stack overflow (double-fault):  [#1] SMP PTI
>> Modules linked in:
>> CPU: 0 PID: 13917 Comm: ip Not tainted 4.16.0-11878-gef9d066f6808 #32
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 
>> 04/01/2014
>> RIP: 0010:validate_chain.isra.23+0x44/0xc40
>> RSP: 0018:c92cbff8 EFLAGS: 00010002
>> RAX: 0004 RBX: 0e58b88e1d4d15da RCX: 0e58b88e1d4d15da
>> RDX:

[PATCH 26/39] rtc/proc: switch to proc_create_single_data

2018-04-19 Thread Christoph Hellwig

And stop trying to get a reference on the submodule, procfs code deals
with release after and unloaded module and thus removed proc entry.

Signed-off-by: Christoph Hellwig 
---
 drivers/rtc/rtc-proc.c | 33 ++---
 1 file changed, 2 insertions(+), 31 deletions(-)

diff --git a/drivers/rtc/rtc-proc.c b/drivers/rtc/rtc-proc.c
index 31e7e23cc5be..a9dd9218fae2 100644
--- a/drivers/rtc/rtc-proc.c
+++ b/drivers/rtc/rtc-proc.c
@@ -107,40 +107,11 @@ static int rtc_proc_show(struct seq_file *seq, void 
*offset)
return 0;
 }
 
-static int rtc_proc_open(struct inode *inode, struct file *file)
-{
-   int ret;
-   struct rtc_device *rtc = PDE_DATA(inode);
-
-   if (!try_module_get(rtc->owner))
-   return -ENODEV;
-
-   ret = single_open(file, rtc_proc_show, rtc);
-   if (ret)
-   module_put(rtc->owner);
-   return ret;
-}
-
-static int rtc_proc_release(struct inode *inode, struct file *file)
-{
-   int res = single_release(inode, file);
-   struct rtc_device *rtc = PDE_DATA(inode);
-
-   module_put(rtc->owner);
-   return res;
-}
-
-static const struct file_operations rtc_proc_fops = {
-   .open   = rtc_proc_open,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= rtc_proc_release,
-};
-
 void rtc_proc_add_device(struct rtc_device *rtc)
 {
if (is_rtc_hctosys(rtc))
-   proc_create_data("driver/rtc", 0, NULL, &rtc_proc_fops, rtc);
+   proc_create_single_data("driver/rtc", 0, NULL, rtc_proc_show,
+   rtc);
 }
 
 void rtc_proc_del_device(struct rtc_device *rtc)
-- 
2.17.0

1 2 3 4 >

1 - 100 of 308 matches

Mail list logo