date:20170627

[PATCH iproute2 V1 2/6] rdma: Add dev object

2017-06-27 Thread Leon Romanovsky

From: Leon Romanovsky 

Device (dev) object represents struct ib_device to user space.

The supported commands are show, set and help, but it doesn't print
anything except device name at this stage. The downstream patches will
fill this object with subcommands.

Print all devices:
 # rdma dev
1: mlx5_0:
2: mlx5_1:
3: mlx5_2:

Print specific device:
 # rdma dev show mlx5_1
2: mlx5_1:

Signed-off-by: Leon Romanovsky 
---
 rdma/Makefile |  2 +-
 rdma/dev.c| 55 +++
 rdma/rdma.c   |  3 ++-
 rdma/rdma.h   |  6 ++
 rdma/utils.c  | 39 ++-
 5 files changed, 98 insertions(+), 7 deletions(-)
 create mode 100644 rdma/dev.c

diff --git a/rdma/Makefile b/rdma/Makefile
index 64da2142..123d7ac5 100644
--- a/rdma/Makefile
+++ b/rdma/Makefile
@@ -2,7 +2,7 @@ include ../Config
 
 ifeq ($(HAVE_MNL),y)
 
-RDMA_OBJ = rdma.o utils.o
+RDMA_OBJ = rdma.o utils.o dev.o
 
 TARGETS=rdma
 CFLAGS += $(shell $(PKG_CONFIG) libmnl --cflags)
diff --git a/rdma/dev.c b/rdma/dev.c
new file mode 100644
index ..d4809d63
--- /dev/null
+++ b/rdma/dev.c
@@ -0,0 +1,55 @@
+/*
+ * dev.c   RDMA tool
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * Authors: Leon Romanovsky 
+ */
+
+#include "rdma.h"
+
+static int dev_help(struct rdma *rd)
+{
+   pr_out("Usage: %s dev show [DEV]\n", rd->filename);
+   return 0;
+}
+
+static void dev_one_show(const struct dev_map *dev_map)
+{
+   pr_out("%u: %s:\n", dev_map->idx, dev_map->dev_name);
+}
+
+static int dev_show(struct rdma *rd)
+{
+   struct dev_map *dev_map;
+
+   if (rd_no_arg(rd)) {
+   list_for_each_entry(dev_map, >dev_map_list, list)
+   dev_one_show(dev_map);
+   }
+   else {
+   dev_map = dev_map_lookup(rd, false);
+   if (!dev_map) {
+   pr_err("Wrong device name\n");
+   return -ENOENT;
+   }
+   dev_one_show(dev_map);
+   }
+   return 0;
+}
+
+int cmd_dev(struct rdma *rd)
+{
+   const struct rdma_cmd cmds[] = {
+   { NULL, dev_show },
+   { "show",   dev_show },
+   { "list",   dev_show },
+   { "help",   dev_help },
+   { 0 }
+   };
+
+   return rdma_exec_cmd(rd, cmds, "dev command");
+}
diff --git a/rdma/rdma.c b/rdma/rdma.c
index 9c754da3..f904532c 100644
--- a/rdma/rdma.c
+++ b/rdma/rdma.c
@@ -18,7 +18,7 @@
 static void help(char *name)
 {
pr_out("Usage: %s [ OPTIONS ] OBJECT { COMMAND | help }\n"
-  "where  OBJECT := { help }\n"
+  "where  OBJECT := { dev | help }\n"
   "   OPTIONS := { -V[ersion] }\n", name);
 }
 
@@ -33,6 +33,7 @@ static int rd_cmd(struct rdma *rd)
const struct rdma_cmd cmds[] = {
{ NULL, cmd_help },
{ "help",   cmd_help },
+   { "dev",cmd_dev },
{ 0 }
};
 
diff --git a/rdma/rdma.h b/rdma/rdma.h
index 9841aebf..f5e104ec 100644
--- a/rdma/rdma.h
+++ b/rdma/rdma.h
@@ -58,12 +58,18 @@ bool rd_no_arg(struct rdma *rd);
 bool rd_argv_match(struct rdma *rd, const char *pattern);
 void rd_arg_inc(struct rdma *rd);
 
+/*
+ * Commands interface
+ */
+int cmd_dev(struct rdma *rd);
 int rdma_exec_cmd(struct rdma *rd, const struct rdma_cmd *c, const char *str);
 
 /*
  * Device manipulation
  */
 void rdma_free_devmap(struct rdma *rd);
+struct dev_map *dev_map_lookup(struct rdma *rd, bool allow_port_index);
+struct dev_map *_dev_map_lookup(struct rdma *rd, const char *dev_name);
 
 /*
  * Netlink
diff --git a/rdma/utils.c b/rdma/utils.c
index 96278a4c..4d29eced 100644
--- a/rdma/utils.c
+++ b/rdma/utils.c
@@ -128,8 +128,10 @@ static int port_map_alloc(struct dev_map *dev_map, 
uint32_t num_ports)
 }
 
 static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
+   [RDMA_NLDEV_ATTR_DEV_INDEX] = MNL_TYPE_U32,
[RDMA_NLDEV_ATTR_DEV_NAME] = MNL_TYPE_NUL_STRING,
[RDMA_NLDEV_ATTR_PORT_INDEX] = MNL_TYPE_U32,
+   [RDMA_NLDEV_ATTR_CAP_FLAGS] = MNL_TYPE_U64,
 };
 
 int rd_attr_cb(const struct nlattr *attr, void *data)
@@ -156,10 +158,9 @@ int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data)
struct rdma *rd = data;
const char *dev_name;
uint32_t num_ports;
-   static int i = 1;
 
mnl_attr_parse(nlh, 0, rd_attr_cb, tb);
-   if (!tb[RDMA_NLDEV_ATTR_DEV_NAME])
+   if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_DEV_NAME])
return MNL_CB_ERROR;
if

[PATCH iproute2 V1 1/6] rdma: Add basic infrastructure for RDMA tool

2017-06-27 Thread Leon Romanovsky

From: Leon Romanovsky 

RDMA devices are cross-functional devices from one side,
but very tailored for the specific markets from another.

Such diversity caused to spread of RDMA related configuration
across various tools, e.g. devlink, ip, ethtool, ib specific and
vendor specific solutions.

This patch adds ability to fill device and port information
by reading RDMA netlink.

Signed-off-by: Leon Romanovsky 
---
 Makefile|   2 +-
 rdma/.gitignore |   1 +
 rdma/Makefile   |  22 +
 rdma/rdma.c | 110 +++
 rdma/rdma.h |  76 
 rdma/utils.c| 270 
 6 files changed, 480 insertions(+), 1 deletion(-)
 create mode 100644 rdma/.gitignore
 create mode 100644 rdma/Makefile
 create mode 100644 rdma/rdma.c
 create mode 100644 rdma/rdma.h
 create mode 100644 rdma/utils.c

diff --git a/Makefile b/Makefile
index 18de7dcb..c255063b 100644
--- a/Makefile
+++ b/Makefile
@@ -52,7 +52,7 @@ WFLAGS += -Wmissing-declarations -Wold-style-definition 
-Wformat=2
 CFLAGS := $(WFLAGS) $(CCOPTS) -I../include $(DEFINES) $(CFLAGS)
 YACCFLAGS = -d -t -v
 
-SUBDIRS=lib ip tc bridge misc netem genl tipc devlink man
+SUBDIRS=lib ip tc bridge misc netem genl tipc devlink rdma man
 
 LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a
 LDLIBS += $(LIBNETLINK)
diff --git a/rdma/.gitignore b/rdma/.gitignore
new file mode 100644
index ..51fb172b
--- /dev/null
+++ b/rdma/.gitignore
@@ -0,0 +1 @@
+rdma
diff --git a/rdma/Makefile b/rdma/Makefile
new file mode 100644
index ..64da2142
--- /dev/null
+++ b/rdma/Makefile
@@ -0,0 +1,22 @@
+include ../Config
+
+ifeq ($(HAVE_MNL),y)
+
+RDMA_OBJ = rdma.o utils.o
+
+TARGETS=rdma
+CFLAGS += $(shell $(PKG_CONFIG) libmnl --cflags)
+LDLIBS += $(shell $(PKG_CONFIG) libmnl --libs)
+
+endif
+
+all:   $(TARGETS) $(LIBS)
+
+rdma:  $(RDMA_OBJ) $(LIBS)
+   $(QUIET_LINK)$(CC) $^ $(LDFLAGS) $(LDLIBS) -o $@
+
+install: all
+   install -m 0755 $(TARGETS) $(DESTDIR)$(SBINDIR)
+
+clean:
+   rm -f $(RDMA_OBJ) $(TARGETS)
diff --git a/rdma/rdma.c b/rdma/rdma.c
new file mode 100644
index ..9c754da3
--- /dev/null
+++ b/rdma/rdma.c
@@ -0,0 +1,110 @@
+/*
+ * rdma.c  RDMA tool
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * Authors: Leon Romanovsky 
+ */
+
+#include 
+#include 
+
+#include "rdma.h"
+#include "SNAPSHOT.h"
+
+static void help(char *name)
+{
+   pr_out("Usage: %s [ OPTIONS ] OBJECT { COMMAND | help }\n"
+  "where  OBJECT := { help }\n"
+  "   OPTIONS := { -V[ersion] }\n", name);
+}
+
+static int cmd_help(struct rdma *rd)
+{
+   help(rd->filename);
+   return 0;
+}
+
+static int rd_cmd(struct rdma *rd)
+{
+   const struct rdma_cmd cmds[] = {
+   { NULL, cmd_help },
+   { "help",   cmd_help },
+   { 0 }
+   };
+
+   return rdma_exec_cmd(rd, cmds, "object");
+}
+
+static int rd_init(struct rdma *rd, int argc, char **argv, char *filename)
+{
+   uint32_t seq;
+   int ret;
+
+   rd->filename = filename;
+   rd->argc = argc;
+   rd->argv = argv;
+   INIT_LIST_HEAD(>dev_map_list);
+   rd->buff = malloc(MNL_SOCKET_BUFFER_SIZE);
+   if (!rd->buff)
+   return -ENOMEM;
+
+   rdma_prepare_msg(rd, RDMA_NLDEV_CMD_GET, , (NLM_F_REQUEST | 
NLM_F_ACK | NLM_F_DUMP));
+   if ((ret = rdma_send_msg(rd)))
+   return ret;
+
+   return rdma_recv_msg(rd, rd_dev_init_cb, rd, seq);
+}
+
+static void rd_free(struct rdma *rd)
+{
+   free(rd->buff);
+   rdma_free_devmap(rd);
+}
+int main(int argc, char **argv)
+{
+   char *filename;
+   static const struct option long_options[] = {
+   { "version",no_argument,NULL, 'V' },
+   { "help",   no_argument,NULL, 'h' },
+   { NULL, 0, NULL, 0 }
+   };
+   struct rdma rd;
+   int opt;
+   int err;
+
+   filename = basename(argv[0]);
+
+   while ((opt = getopt_long(argc, argv, "Vh",
+ long_options, NULL)) >= 0) {
+
+   switch (opt) {
+   case 'V':
+   printf("%s utility, iproute2-ss%s\n", filename, 
SNAPSHOT);
+   return EXIT_SUCCESS;
+   case 'h':
+   help(filename);
+   return EXIT_SUCCESS;
+   default:
+   pr_err("Unknown option.\n");
+   help(filename);
+   return EXIT_FAILURE;
+   }
+   }
+
+   argc -=

[PATCH iproute2 V1 6/6] rdma: Add initial manual for the tool

2017-06-27 Thread Leon Romanovsky

From: Leon Romanovsky 

Signed-off-by: Leon Romanovsky 
---
 man/man8/Makefile |  3 +-
 man/man8/rdma.8   | 82 +++
 2 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 man/man8/rdma.8

diff --git a/man/man8/Makefile b/man/man8/Makefile
index f3318644..81979a07 100644
--- a/man/man8/Makefile
+++ b/man/man8/Makefile
@@ -19,7 +19,8 @@ MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 
rtmon.8 rtpr.8 ss.
tc-simple.8 tc-skbedit.8 tc-vlan.8 tc-xt.8 tc-skbmod.8 tc-ife.8 \
tc-tunnel_key.8 tc-sample.8 \
devlink.8 devlink-dev.8 devlink-monitor.8 devlink-port.8 devlink-sb.8 \
-   ifstat.8
+   ifstat.8 \
+   rdma.8
 
 all: $(TARGETS)
 
diff --git a/man/man8/rdma.8 b/man/man8/rdma.8
new file mode 100644
index ..7578c15e
--- /dev/null
+++ b/man/man8/rdma.8
@@ -0,0 +1,82 @@
+.TH RDMA 8 "28 Mar 2017" "iproute2" "Linux"
+.SH NAME
+rdma \- RDMA tool
+.SH SYNOPSIS
+.sp
+.ad l
+.in +8
+.ti -8
+.B rdma
+.RI "[ " OPTIONS " ] " OBJECT " { " COMMAND " | "
+.BR help " }"
+.sp
+
+.ti -8
+.IR OBJECT " := { "
+.BR dev " | " link " }"
+.sp
+
+.ti -8
+.IR OPTIONS " := { "
+\fB\-V\fR[\fIersion\fR] }
+
+.SH OPTIONS
+
+.TP
+.BR "\-V" , " -Version"
+Print the version of the
+.B rdma
+tool and exit.
+
+.SS
+.I OBJECT
+
+.TP
+.B dev
+- RDMA device.
+
+.TP
+.B link
+- RDMA port related.
+
+.PP
+The names of all objects may be written in full or
+abbreviated form, for example
+.B stats
+can be abbreviated as
+.B stat
+or just
+.B s.
+
+.SS
+.I COMMAND
+
+Specifies the action to perform on the object.
+The set of possible actions depends on the object type.
+As a rule, it is possible to
+.B show
+(or
+.B list
+) objects, but some objects do not allow all of these operations
+or have some additional commands. The
+.B help
+command is available for all objects. It prints
+out a list of available commands and argument syntax conventions.
+.sp
+If no command is given, some default command is assumed.
+Usually it is
+.B list
+or, if the objects of this class cannot be listed,
+.BR "help" .
+
+.SH EXIT STATUS
+Exit status is 0 if command was successful or a positive integer upon failure.
+
+.SH REPORTING BUGS
+Report any bugs to the Linux RDMA mailing list
+.B 
+where the development and maintenance is primarily done.
+You do not have to be subscribed to the list to send a message there.
+
+.SH AUTHOR
+Leon Romanovsky 
-- 
2.13.1

[PATCH iproute2 V1 5/6] rdma: Add FW version to the device output

2017-06-27 Thread Leon Romanovsky

From: Leon Romanovsky 

$ rdma dev show mlx5_4
5: mlx5_4: fw 2.8.
caps: 

Signed-off-by: Leon Romanovsky 
---
 rdma/dev.c   | 9 -
 rdma/rdma.h  | 1 +
 rdma/utils.c | 4 
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/rdma/dev.c b/rdma/dev.c
index 76f4af88..45dc0b3f 100644
--- a/rdma/dev.c
+++ b/rdma/dev.c
@@ -83,7 +83,14 @@ static int dev_no_args(struct rdma *rd)
 {
struct dev_map *dev_map = rd->dev_map_curr;
 
-   pr_out("%u: %s: \n", dev_map->idx, dev_map->dev_name);
+   pr_out("%u: %s: ", dev_map->idx, dev_map->dev_name);
+   if (strlen(dev_map->fw_version) < 1)
+   /*
+* if no FW, the return string from RDMA netlink is "\0"
+*/
+   pr_out("fw NONE\n");
+   else
+   pr_out("fw %s\n", dev_map->fw_version);
return dev_print_caps(rd);
 }
 
diff --git a/rdma/rdma.h b/rdma/rdma.h
index 553a4fc2..f221575e 100644
--- a/rdma/rdma.h
+++ b/rdma/rdma.h
@@ -36,6 +36,7 @@ struct dev_map {
struct list_head port_map_list;
uint32_t idx;
uint64_t caps;
+   char *fw_version;
 };
 
 struct rdma {
diff --git a/rdma/utils.c b/rdma/utils.c
index 68ae3d3e..47a6ab11 100644
--- a/rdma/utils.c
+++ b/rdma/utils.c
@@ -104,6 +104,7 @@ static void dev_map_free(struct dev_map *dev_map)
port_map_free(port_map);
}
 
+   free(dev_map->fw_version);
free(dev_map->dev_name);
free(dev_map);
 }
@@ -141,6 +142,7 @@ static const enum mnl_attr_data_type 
nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_DEV_NAME] = MNL_TYPE_NUL_STRING,
[RDMA_NLDEV_ATTR_PORT_INDEX] = MNL_TYPE_U32,
[RDMA_NLDEV_ATTR_CAP_FLAGS] = MNL_TYPE_U64,
+   [RDMA_NLDEV_ATTR_FW_VERSION] = MNL_TYPE_NUL_STRING,
 };
 
 int rd_attr_cb(const struct nlattr *attr, void *data)
@@ -190,6 +192,8 @@ int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data)
dev_map->idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
dev_map->caps = mnl_attr_get_u64(tb[RDMA_NLDEV_ATTR_CAP_FLAGS]);
 
+   dev_map->fw_version = 
strdup(mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_FW_VERSION]));
+
return MNL_CB_OK;
 }
 
-- 
2.13.1

[PATCH iproute2 V1 4/6] rdma: Add link option and parsing

2017-06-27 Thread Leon Romanovsky

From: Leon Romanovsky 

Add link object interface together with port capability parsing command

$ rdma link
1/1: mlx5_0/1:
caps: 
2/1: mlx5_1/1:
caps: 
3/1: mlx5_2/1:
caps: 
4/1: mlx5_3/1:
caps: 
5/1: mlx5_4/1:
caps: 

$ rdma link show mlx5_4
5/1: mlx5_4/1:
caps: 

Signed-off-by: Leon Romanovsky 
---
 rdma/Makefile |   2 +-
 rdma/link.c   | 202 ++
 rdma/rdma.c   |   1 +
 rdma/rdma.h   |   4 ++
 rdma/utils.c  |   9 +++
 5 files changed, 217 insertions(+), 1 deletion(-)
 create mode 100644 rdma/link.c

diff --git a/rdma/Makefile b/rdma/Makefile
index 123d7ac5..1a9e4b1a 100644
--- a/rdma/Makefile
+++ b/rdma/Makefile
@@ -2,7 +2,7 @@ include ../Config
 
 ifeq ($(HAVE_MNL),y)
 
-RDMA_OBJ = rdma.o utils.o dev.o
+RDMA_OBJ = rdma.o utils.o dev.o link.o
 
 TARGETS=rdma
 CFLAGS += $(shell $(PKG_CONFIG) libmnl --cflags)
diff --git a/rdma/link.c b/rdma/link.c
new file mode 100644
index ..1ffc83d4
--- /dev/null
+++ b/rdma/link.c
@@ -0,0 +1,202 @@
+/*
+ * link.c  RDMA tool
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * Authors: Leon Romanovsky 
+ */
+
+#include "rdma.h"
+
+static int link_help(struct rdma *rd)
+{
+   pr_out("Usage: %s link show [DEV/PORT_INDEX]\n", rd->filename);
+   return 0;
+}
+
+static const char *link_caps[64] = {
+   "UNKNOWN",
+   "SM",
+   "NOTICE",
+   "TRAP",
+   "OPT_IPD",
+   "AUTO_MIGR",
+   "SL_MAP",
+   "MKEY_NVRAM",
+   "PKEY_NVRAM",
+   "LED_INFO",
+   "SM_DISABLED",
+   "SYS_IMAGE_GUID",
+   "PKEY_SW_EXT_PORT_TRAP",
+   "UNKNOWN",
+   "EXTENDED_SPEEDS",
+   "UNKNOWN",
+   "CM",
+   "SNMP_TUNNEL",
+   "REINIT",
+   "DEVICE_MGMT",
+   "VENDOR_CLASS",
+   "DR_NOTICE",
+   "CAP_MASK_NOTICE",
+   "BOOT_MGMT",
+   "LINK_LATENCY",
+   "CLIENT_REG",
+   "IP_BASED_GIDS",
+};
+
+static int link_print_caps(struct rdma *rd)
+{
+   uint64_t caps = rd->port_map_curr->caps;
+   bool found = false;
+   uint32_t idx;
+
+   pr_out("caps: <");
+   for (idx = 0; idx < 64; idx++) {
+   if (caps & 0x1) {
+   pr_out("%s", link_caps[idx]?link_caps[idx]:"UNKNONW");
+   if (caps >> 0x1)
+   pr_out(", ");
+   found = true;
+   }
+   caps >>= 0x1;
+   }
+   if(!found)
+   pr_out("NONE");
+
+   pr_out(">\n");
+   return 0;
+}
+
+static int link_no_args(struct rdma *rd)
+{
+   struct port_map *port_map = rd->port_map_curr;
+   struct dev_map *dev_map = rd->dev_map_curr;
+
+   pr_out("%u/%u: %s/%u: \n", dev_map->idx, port_map->idx, 
dev_map->dev_name, port_map->idx);
+   return link_print_caps(rd);
+}
+
+static int link_one_show(struct rdma *rd)
+{
+   const struct rdma_cmd cmds[] = {
+   { NULL, link_no_args},
+   { 0 }
+   };
+
+   return rdma_exec_cmd(rd, cmds, "parameter");
+
+}
+
+static int port_init_cb(const struct nlmsghdr *nlh, void *data)
+{
+   struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {};
+   struct port_map *port_map;
+   struct dev_map *dev_map;
+   struct rdma *rd = data;
+   uint32_t port_idx;
+   uint32_t caps;
+
+   mnl_attr_parse(nlh, 0, rd_attr_cb, tb);
+   if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_DEV_NAME])
+   return MNL_CB_ERROR;
+   if (!tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+   pr_err("This tool doesn't support switches yet\n");
+   return MNL_CB_ERROR;
+   }
+
+   dev_map = rd->dev_map_curr;
+
+   port_idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+   caps = mnl_attr_get_u64(tb[RDMA_NLDEV_ATTR_CAP_FLAGS]);
+
+   list_for_each_entry(port_map, _map->port_map_list, list) {
+   if (port_map->idx != port_idx)
+   continue;
+
+   port_map->caps = caps;
+   }
+
+   return MNL_CB_OK;
+}
+
+
+static int fill_port_map(struct rdma *rd)
+{
+   uint32_t seq;
+   int ret;
+
+   rdma_prepare_msg(rd, RDMA_NLDEV_CMD_PORT_GET, , (NLM_F_REQUEST | 
NLM_F_ACK | NLM_F_DUMP));
+   mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, 
rd->dev_map_curr->idx);
+   if ((ret = rdma_send_msg(rd)))
+   return ret;
+
+   return rdma_recv_msg(rd, port_init_cb, rd, seq);
+}
+
+static int link_show(struct rdma *rd)
+{
+   struct port_map *port_map;
+   struct dev_map *dev_map;
+   int ret = 0;
+
+   if (rd_no_arg(rd)) {
+

Re: [PATCH 05/11] net: stmmac: dwmac-rk: Add internal phy support

2017-06-27 Thread David.Wu


Hi Heiko,

在 2017/6/24 1:19, Heiko Stuebner 写道:

Hi David,

Am Freitag, 23. Juni 2017, 12:59:07 CEST schrieb David Wu:

To make internal phy worked, need to configure the phy_clock,
phy cru_reset and related registers.

Change-Id: I6971c0a769754b824b1b908b56080cbaf7867d13


please remove all Change-Ids from patches before sending upstream.
There were more affected patches in this series.


Signed-off-by: David Wu 
---
  .../devicetree/bindings/net/rockchip-dwmac.txt |  3 +
  drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 82 ++
  2 files changed, 85 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt 
b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
index 8f42755..0514f69 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
@@ -22,6 +22,7 @@ Required properties:
   < SCLK_MACREF_OUT> clock gate for RMII reference clock output
   < ACLK_GMAC>: AXI clock gate for GMAC
   < PCLK_GMAC>: APB clock gate for GMAC
+  < MAC_PHY>: clock for internal macphy


that clock should not be listed as always "Required" like it is here.
Make it some sort of extra paragraph marking it as required when using
an internal phy.



Okay, move it to the option.


   - clock-names: One name for each entry in the clocks property.
   - phy-mode: See ethernet.txt file in the same directory.
   - pinctrl-names: Names corresponding to the numbered pinctrl states.
@@ -35,6 +36,8 @@ Required properties:
   - assigned-clocks: main clock, should be < SCLK_MAC>;
   - assigned-clock-parents = parent of main clock.
 can be <_gmac> or < SCLK_MAC_PLL>.
+ - phy-type: For internal phy, it must be "internal"; For external phy, no need
+   to configure this.
  
  Optional properties:

   - tx_delay: Delay value for TXD timing. Range value is 0~0x7F, 0x30 as 
default.
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c 
b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index a8e8fd5..c1a1413 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -41,6 +41,7 @@ struct rk_gmac_ops {
void (*set_to_rmii)(struct rk_priv_data *bsp_priv);
void (*set_rgmii_speed)(struct rk_priv_data *bsp_priv, int speed);
void (*set_rmii_speed)(struct rk_priv_data *bsp_priv, int speed);
+   void (*internal_phy_powerup)(struct rk_priv_data *bsp_priv);
  };
  
  struct rk_priv_data {

@@ -52,6 +53,7 @@ struct rk_priv_data {
  
  	bool clk_enabled;

bool clock_input;
+   bool internal_phy;
  
  	struct clk *clk_mac;

struct clk *gmac_clkin;
@@ -61,6 +63,9 @@ struct rk_priv_data {
struct clk *clk_mac_refout;
struct clk *aclk_mac;
struct clk *pclk_mac;
+   struct clk *clk_macphy;
+
+   struct reset_control *macphy_reset;
  
  	int tx_delay;

int rx_delay;
@@ -750,6 +755,48 @@ static void rk3399_set_rmii_speed(struct rk_priv_data 
*bsp_priv, int speed)
.set_rmii_speed = rk3399_set_rmii_speed,
  };
  
+#define RK_GRF_MACPHY_CON0		0xb00

+#define RK_GRF_MACPHY_CON1 0xb04
+#define RK_GRF_MACPHY_CON2 0xb08
+#define RK_GRF_MACPHY_CON3 0xb0c
+
+#define RK_MACPHY_ENABLE   GRF_BIT(0)
+#define RK_MACPHY_DISABLE  GRF_CLR_BIT(0)
+#define RK_MACPHY_CFG_CLK_50M  GRF_BIT(14)
+#define RK_GMAC2PHY_RMII_MODE  (GRF_BIT(6) | GRF_CLR_BIT(7))
+#define RK_GRF_CON2_MACPHY_ID  HIWORD_UPDATE(0x1234, 0x, 0)
+#define RK_GRF_CON3_MACPHY_ID  HIWORD_UPDATE(0x35, 0x3f, 0)


These are primarily registers for the rk3328 and come from the GRF which is
somehow prone to chip-designers moving bits around in registers and also
especially the register offsets (*_CONx) will probably not stay the same
on future socs.



I think they should try to keep the same. But what you said is very 
reasonable. So let's give rk3228 and rk3328 different 
internal_phy_powerup() in the rk_gmac_ops to set their own configuration?





+static void rk_gmac_internal_phy_powerup(struct rk_priv_data *priv)
+{
+   if (priv->ops->internal_phy_powerup)
+   priv->ops->internal_phy_powerup(priv);
+
+   regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_CFG_CLK_50M);
+   regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_GMAC2PHY_RMII_MODE);
+
+   regmap_write(priv->grf, RK_GRF_MACPHY_CON2, RK_GRF_CON2_MACPHY_ID);
+   regmap_write(priv->grf, RK_GRF_MACPHY_CON3, RK_GRF_CON3_MACPHY_ID);
+
+   /* disable macphy, the default value is enabled */


that comment is not providing useful information, maybe
/* macphy needs to be disabled before trying to reset it */



+   regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_DISABLE);
+   if (priv->macphy_reset)
+   reset_control_assert(priv->macphy_reset);
+

[net-next 10/16] net/mlx5: FPGA, Add SBU bypass and reset flows

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

The Innova FPGA includes shell hardware and Sandbox-Unit (SBU) hardware.
The shell hardware is handled by mlx5_core itself, while the SBU is
handled by a client driver.

Reset the SBU to a well-known initial state when initializing a new
device, and set the FPGA to bypass mode when uninitializing a device.
This allows the client driver to assume that its device has been
reset when a new device is detected.

During SBU reset, the FPGA is put into SBU-bypass mode. In this mode
packets do not pass through the SBU, so it cannot affect the network
data stream at all.

A factory-image does not have an SBU, so skip these flows.

Signed-off-by: Ilan Tayari 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c | 11 ++
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h |  1 +
 .../net/ethernet/mellanox/mlx5/core/fpga/core.c| 40 ++
 include/linux/mlx5/mlx5_ifc_fpga.h |  9 +
 4 files changed, 61 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
index 8308ccbad85a..a5fdb4cf0b9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -47,6 +47,17 @@ int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps)
MLX5_REG_FPGA_CAP, 0, 0);
 }
 
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op)
+{
+   u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
+   u32 out[MLX5_ST_SZ_DW(fpga_ctrl)];
+
+   MLX5_SET(fpga_ctrl, in, operation, op);
+
+   return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+   MLX5_REG_FPGA_CTRL, 0, true);
+}
+
 int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
 {
u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h 
b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
index b851580d846f..8943056163f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
@@ -67,6 +67,7 @@ struct mlx5_fpga_qp_counters {
 
 int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps);
 int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query);
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op);
 
 int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
u32 *fpga_qpn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index 7f859a3ad5d2..31e5a2627eb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -102,6 +102,29 @@ static int mlx5_fpga_device_load_check(struct 
mlx5_fpga_device *fdev)
return 0;
 }
 
+int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
+{
+   int err;
+   struct mlx5_core_dev *mdev = fdev->mdev;
+
+   err = mlx5_fpga_ctrl_op(mdev, 
MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+   if (err) {
+   mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
+   return err;
+   }
+   err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
+   if (err) {
+   mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
+   return err;
+   }
+   err = mlx5_fpga_ctrl_op(mdev, 
MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
+   if (err) {
+   mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
+   return err;
+   }
+   return 0;
+}
+
 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 {
struct mlx5_fpga_device *fdev = mdev->fpga;
@@ -135,8 +158,17 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
if (err)
goto err_rsvd_gid;
 
+   if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+   err = mlx5_fpga_device_brb(fdev);
+   if (err)
+   goto err_conn_init;
+   }
+
goto out;
 
+err_conn_init:
+   mlx5_fpga_conn_device_cleanup(fdev);
+
 err_rsvd_gid:
mlx5_core_unreserve_gids(mdev, max_num_qps);
 out:
@@ -172,6 +204,7 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
struct mlx5_fpga_device *fdev = mdev->fpga;
unsigned int max_num_qps;
unsigned long flags;
+   int err;
 
if (!fdev)
return;
@@ -184,6 +217,13 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
fdev->state = MLX5_FPGA_STATUS_NONE;
spin_unlock_irqrestore(>state_lock, flags);
 
+   if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+   err = mlx5_fpga_ctrl_op(mdev, 
MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+   if (err)
+

[net-next 13/16] net/mlx5e: IPSec, Innova IPSec offload infrastructure

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

Add Innova IPSec ESP crypto offload configuration paths.
Detect Innova IPSec device and set the NETIF_F_HW_ESP flag.
Configure Security Associations using the API introduced in a previous
patch.

Add Software-parser hardware descriptor layout
Software-Parser (swp) is a hardware feature in ConnectX which allows the
host software to specify protocol header offsets in the TX path, thus
overriding the hardware parser.
This is useful for protocols that the ASIC may not be able to parse on
its own.

Note that due to inline metadata, XDP is not supported in Innova IPSec.

Signed-off-by: Ilan Tayari 
Signed-off-by: Yossi Kuperman 
Signed-off-by: Yevgeny Kliteynik 
Signed-off-by: Boris Pismenny 
Signed-off-by: Saeed Mahameed 
---
 MAINTAINERS|  10 +
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig|  12 +
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |   3 +
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c   | 415 +
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.h   |  78 
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  14 +
 include/linux/mlx5/mlx5_ifc.h  |   8 +-
 include/linux/mlx5/qp.h|  14 +-
 9 files changed, 552 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 71a74555afdf..c324460d5042 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8327,6 +8327,16 @@ Q:   http://patchwork.ozlabs.org/project/netdev/list/
 F: drivers/net/ethernet/mellanox/mlx5/core/fpga/*
 F: include/linux/mlx5/mlx5_ifc_fpga.h
 
+MELLANOX ETHERNET INNOVA IPSEC DRIVER
+M: Ilan Tayari 
+R: Boris Pismenny 
+L: netdev@vger.kernel.org
+S: Supported
+W: http://www.mellanox.com
+Q: http://patchwork.ozlabs.org/project/netdev/list/
+F: drivers/net/ethernet/mellanox/mlx5/core/en_ipsec/*
+F: drivers/net/ethernet/mellanox/mlx5/core/ipsec*
+
 MELLANOX ETHERNET SWITCH DRIVERS
 M: Jiri Pirko 
 M: Ido Schimmel 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig 
b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index d6c6cea8ebab..5aee05992f27 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -52,3 +52,15 @@ config MLX5_CORE_IPOIB
default n
---help---
  MLX5 IPoIB offloads & acceleration support.
+
+config MLX5_EN_IPSEC
+   bool "IPSec XFRM cryptography-offload accelaration"
+   depends on MLX5_ACCEL
+   depends on MLX5_CORE_EN
+   depends on XFRM_OFFLOAD
+   depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+   default n
+   ---help---
+ Build support for IPsec cryptography-offload accelaration in the NIC.
+ Note: Support for hardware with this capability needs to be selected
+ for this option to become available.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 33557526f597..7e81084a75ea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -19,3 +19,5 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o 
eswitch_offloads.o \
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o
+
+mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index f93f44d1d1cf..535ffd78a34e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -784,6 +784,9 @@ struct mlx5e_priv {
 
const struct mlx5e_profile *profile;
void  *ppriv;
+#ifdef CONFIG_MLX5_EN_IPSEC
+   struct mlx5e_ipsec*ipsec;
+#endif
 };
 
 struct mlx5e_profile {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
new file mode 100644
index ..06d9d6ad93ad
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *

[net-next 14/16] net/mlx5e: IPSec, Add Innova IPSec offload RX data path

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

In RX data path, the hardware prepends a special metadata ethertype
which indicates that the packet underwent decryption, and the result of
the authentication check.

Communicate this to the stack in skb->sp.

Make wqe_size large enough to account for the injected metadata.

Support only Linked-list RQ type.

IPSec offload RX packets may have useful CHECKSUM_COMPLETE information,
which the stack may not be able to use yet.

Signed-off-by: Ilan Tayari 
Signed-off-by: Yossi Kuperman 
Signed-off-by: Yevgeny Kliteynik 
Signed-off-by: Boris Pismenny 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c   |  19 +++
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.h   |   9 ++
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c   | 135 +
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h   |  44 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  22 +++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c|  41 +++
 7 files changed, 269 insertions(+), 3 deletions(-)
 create mode 100644 
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
 create mode 100644 
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 7e81084a75ea..23cb8ba91e6f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -20,4 +20,4 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o
 
-mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o
+mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 06d9d6ad93ad..bb69660893ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -40,6 +40,7 @@
 #include "en.h"
 #include "accel/ipsec.h"
 #include "en_accel/ipsec.h"
+#include "en_accel/ipsec_rxtx.h"
 
 struct mlx5e_ipsec_sa_entry {
struct hlist_node hlist; /* Item in SADB_RX hashtable */
@@ -49,6 +50,24 @@ struct mlx5e_ipsec_sa_entry {
void *context;
 };
 
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
+ unsigned int handle)
+{
+   struct mlx5e_ipsec_sa_entry *sa_entry;
+   struct xfrm_state *ret = NULL;
+
+   rcu_read_lock();
+   hash_for_each_possible_rcu(ipsec->sadb_rx, sa_entry, hlist, handle)
+   if (sa_entry->handle == handle) {
+   ret = sa_entry->x;
+   xfrm_state_hold(ret);
+   break;
+   }
+   rcu_read_unlock();
+
+   return ret;
+}
+
 static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index b9423a2873e2..4d745d3dd4b1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -41,14 +41,23 @@
 #include 
 
 #define MLX5E_IPSEC_SADB_RX_BITS 10
+#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
+#define MLX5E_METADATA_ETHER_LEN 8
 
 struct mlx5e_priv;
 
+struct mlx5e_ipsec_sw_stats {
+   atomic64_t ipsec_rx_drop_sp_alloc;
+   atomic64_t ipsec_rx_drop_sadb_miss;
+   atomic64_t ipsec_rx_drop_syndrome;
+};
+
 struct mlx5e_ipsec {
struct mlx5e_priv *en_priv;
DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
struct ida halloc;
+   struct mlx5e_ipsec_sw_stats sw_stats;
 };
 
 int mlx5e_ipsec_init(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
new file mode 100644
index ..56ab2e80553e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted

[net-next 11/16] net/mlx5: FPGA, Add SBU infrastructure

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

Add interface to initialize and interact with Innova FPGA SBU
connections.
A client driver may use these functions to set up a high-speed DMA
connection with its SBU hardware logic, and send/receive messages
over this connection.

A later patch in this patchset will make use of these functions for
Innova IPSec offload in mlx5 Ethernet driver.

Add commands to retrieve Innova FPGA SBU capabilities, and to
read/write Innova FPGA configuration space registers and memory,
over internal I2C.

At high level, the FPGA configuration space is divided such:
 0x - 0x007f is reserved for the SBU
 0x0080 - 0x is reserved for the Shell
0x4 - ...is DDR memory

A later patchset will add support for accessing FPGA CrSpace and memory
over a high-speed connection. This is the reason for the ACCESS_TYPE
enumeration, which currently only supports I2C.

Signed-off-by: Ilan Tayari 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c |  65 
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c | 164 +
 drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h |  98 
 include/linux/mlx5/device.h|   3 +
 include/linux/mlx5/driver.h|   1 +
 include/linux/mlx5/mlx5_ifc.h  |   1 +
 include/linux/mlx5/mlx5_ifc_fpga.h |  13 ++
 9 files changed, 349 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 5221b1235c47..676388fde239 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -6,7 +6,7 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
fs_counters.o rl.o lag.o dev.o lib/gid.o
 
-mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
index a5fdb4cf0b9c..5cb855fd618f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -38,6 +38,39 @@
 #include "mlx5_core.h"
 #include "fpga/cmd.h"
 
+#define MLX5_FPGA_ACCESS_REG_SZ (MLX5_ST_SZ_DW(fpga_access_reg) + \
+MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+void *buf, bool write)
+{
+   u32 in[MLX5_FPGA_ACCESS_REG_SZ] = {0};
+   u32 out[MLX5_FPGA_ACCESS_REG_SZ];
+   int err;
+
+   if (size & 3)
+   return -EINVAL;
+   if (addr & 3)
+   return -EINVAL;
+   if (size > MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+   return -EINVAL;
+
+   MLX5_SET(fpga_access_reg, in, size, size);
+   MLX5_SET64(fpga_access_reg, in, address, addr);
+   if (write)
+   memcpy(MLX5_ADDR_OF(fpga_access_reg, in, data), buf, size);
+
+   err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+  MLX5_REG_FPGA_ACCESS_REG, 0, write);
+   if (err)
+   return err;
+
+   if (!write)
+   memcpy(buf, MLX5_ADDR_OF(fpga_access_reg, out, data), size);
+
+   return 0;
+}
+
 int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps)
 {
u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0};
@@ -58,6 +91,38 @@ int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op)
MLX5_REG_FPGA_CTRL, 0, true);
 }
 
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size)
+{
+   unsigned int cap_size = MLX5_CAP_FPGA(dev, sandbox_extended_caps_len);
+   u64 addr = MLX5_CAP64_FPGA(dev, sandbox_extended_caps_addr);
+   unsigned int read;
+   int ret = 0;
+
+   if (cap_size > size) {
+   mlx5_core_warn(dev, "Not enough buffer %u for FPGA SBU caps %u",
+  size, cap_size);
+   return -EINVAL;
+   }
+
+   while (cap_size > 0) {
+   read = min_t(unsigned int, cap_size,
+MLX5_FPGA_ACCESS_REG_SIZE_MAX);
+
+   ret = mlx5_fpga_access_reg(dev, read, addr, caps, false);
+   if (ret) {
+   mlx5_core_warn(dev, "Error reading FPGA SBU caps %u 
bytes at address

[net-next 05/16] net/mlx5: Make get_cqe routine not ethernet-specific

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

Move mlx5e_get_cqe routine to wq.h and rename it to
mlx5_cqwq_get_cqe.

This allows it to be used by other CQ users outside of the
ethernet driver code.

A later patch in this patchset will make use of it from
FPGA code for the FPGA high-speed connection.

Signed-off-by: Ilan Tayari 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 -
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 19 +--
 drivers/net/ethernet/mellanox/mlx5/core/wq.h  | 17 +
 5 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index eef0a50e2388..f93f44d1d1cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -833,7 +833,6 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq);
 void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
-struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
 
 void mlx5e_rx_am(struct mlx5e_rq *rq);
 void mlx5e_rx_am_work(struct work_struct *work);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 5f3c138c948d..574a96279340 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -996,7 +996,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
 
for (; work_done < budget; work_done++) {
-   struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq);
+   struct mlx5_cqe64 *cqe = mlx5_cqwq_get_cqe(>wq);
 
if (!cqe)
break;
@@ -1050,7 +1050,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
u16 wqe_counter;
bool last_wqe;
 
-   cqe = mlx5e_get_cqe(cq);
+   cqe = mlx5_cqwq_get_cqe(>wq);
if (!cqe)
break;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 0433d69429f3..ccec3b00e17c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -409,7 +409,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
u16 wqe_counter;
bool last_wqe;
 
-   cqe = mlx5e_get_cqe(cq);
+   cqe = mlx5_cqwq_get_cqe(>wq);
if (!cqe)
break;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 5ca6714e3e02..92db28a9ed43 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -32,23 +32,6 @@
 
 #include "en.h"
 
-struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq)
-{
-   struct mlx5_cqwq *wq = >wq;
-   u32 ci = mlx5_cqwq_get_ci(wq);
-   struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
-   u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK;
-   u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1;
-
-   if (cqe_ownership_bit != sw_ownership_val)
-   return NULL;
-
-   /* ensure cqe content is read after cqe ownership bit */
-   dma_rmb();
-
-   return cqe;
-}
-
 static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
 struct mlx5e_icosq *sq,
 struct mlx5_cqe64 *cqe,
@@ -89,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, >state)))
return;
 
-   cqe = mlx5e_get_cqe(cq);
+   cqe = mlx5_cqwq_get_cqe(>wq);
if (likely(!cqe))
return;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h 
b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index d8afed898c31..9ded5d40ce6b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -34,6 +34,7 @@
 #define __MLX5_WQ_H__
 
 #include 
+#include 
 
 struct mlx5_wq_param {
int linear;
@@ -146,6 +147,22 @@ static inline void mlx5_cqwq_update_db_record(struct 
mlx5_cqwq *wq)
*wq->db = cpu_to_be32(wq->cc & 0xff);
 }
 
+static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq)
+{
+   u32 ci = mlx5_cqwq_get_ci(wq);
+   struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
+   u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK;
+   u8

[net-next 02/16] net/mlx5: Add reserved-gids support

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

Reserved GIDs are entries in the GID table in use by the mlx5_core
and its submodules (e.g. FPGA, SRIOV, E-Swtich, netdev).
The entries are reserved at the high indexes of the GID table.

A mlx5 submodule may reserve a certain amount of GIDs for its own use
during the load sequence by calling mlx5_core_reserve_gids, and must
also take care to un-reserve these GIDs when it closes.
Reservation is only allowed during the load sequence and before any
interfaces (e.g. mlx5_ib or mlx5_en) are up.

After reservation, a submodule may call mlx5_core_reserved_gid_alloc/
free to allocate entries from the reserved GIDs pool.

Reserve a GID table entry for every supported FPGA QP.

A later patch in the patchset will remove them from being reported to
IB core.
Another such patch will make use of these for FPGA QPs in Innova NIC.

Added lib/mlx5.h to serve as a library for mlx5 submodlues, and to
expose only public mlx5 API, more mlx5 library files will be added in
future submissions.

Signed-off-by: Ilan Tayari 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 .../net/ethernet/mellanox/mlx5/core/fpga/core.c|  31 -
 .../net/ethernet/mellanox/mlx5/core/fpga/core.h|   5 +
 drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c  | 154 +
 drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h |  43 ++
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  11 +-
 include/linux/mlx5/driver.h|  17 +++
 7 files changed, 260 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 5ad093a21a6e..738867bab21f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -4,7 +4,7 @@ subdir-ccflags-y += -I$(src)
 mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-   fs_counters.o rl.o lag.o dev.o
+   fs_counters.o rl.o lag.o dev.o lib/gid.o
 
 mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index d88b332e9669..92d8b1b6e598 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -35,6 +35,7 @@
 #include 
 
 #include "mlx5_core.h"
+#include "lib/mlx5.h"
 #include "fpga/core.h"
 
 static const char *const mlx5_fpga_error_strings[] = {
@@ -104,6 +105,7 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
 {
struct mlx5_fpga_device *fdev = mdev->fpga;
unsigned long flags;
+   unsigned int max_num_qps;
int err;
 
if (!fdev)
@@ -123,6 +125,9 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
   mlx5_fpga_image_name(fdev->last_oper_image),
   MLX5_CAP_FPGA(fdev->mdev, image_version));
 
+   max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+   err = mlx5_core_reserve_gids(mdev, max_num_qps);
+
 out:
spin_lock_irqsave(>state_lock, flags);
fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
@@ -151,9 +156,33 @@ int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
return 0;
 }
 
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+   struct mlx5_fpga_device *fdev = mdev->fpga;
+   unsigned int max_num_qps;
+   unsigned long flags;
+
+   if (!fdev)
+   return;
+
+   spin_lock_irqsave(>state_lock, flags);
+   if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
+   spin_unlock_irqrestore(>state_lock, flags);
+   return;
+   }
+   fdev->state = MLX5_FPGA_STATUS_NONE;
+   spin_unlock_irqrestore(>state_lock, flags);
+
+   max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+   mlx5_core_unreserve_gids(mdev, max_num_qps);
+}
+
 void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev)
 {
-   kfree(mdev->fpga);
+   struct mlx5_fpga_device *fdev = mdev->fpga;
+
+   mlx5_fpga_device_stop(mdev);
+   kfree(fdev);
mdev->fpga = NULL;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h 
b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index c55044d66778..557d83973ade 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -71,6 +71,7 @@ struct mlx5_fpga_device {
 int mlx5_fpga_device_init(struct mlx5_core_dev *mdev);
 void mlx5_fpga_device_cleanup(struct

[net-next 12/16] net/mlx5: Accel, Add IPSec acceleration interface

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

Add routines for manipulating the hardware IPSec SA database (SADB).

In Innova IPSec, a Security Association (SA) is added or deleted
via a command message over the SBU connection.
The HW then sends a response message over the same connection.

Add implementation for Innova IPSec (FPGA-based) hardware.

These routines will be used by the IPSec offload support in a later patch
However they may also be used by others such as RDMA and RoCE IPSec.

mlx5/accel is a middle acceleration layer to allow mlx5e and other ULPs
to work directly with mlx5_core rather than Innova FPGA or other mlx5
acceleration providers.

In this patchset we add Innova IPSec support and mlx5/accel delegates
IPSec offloads to Innova routines.

In the future, when IPSec/TLS or any other acceleration gets integrated
into ConnectX chip, mlx5/accel layer will provide the integrated
acceleration, rather than the Innova one.

Signed-off-by: Ilan Tayari 
Signed-off-by: Boris Pismenny 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig|   4 +
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   5 +-
 .../net/ethernet/mellanox/mlx5/core/accel/ipsec.c  |  78 +
 .../net/ethernet/mellanox/mlx5/core/accel/ipsec.h  | 138 
 .../net/ethernet/mellanox/mlx5/core/fpga/core.h|   2 +
 .../net/ethernet/mellanox/mlx5/core/fpga/ipsec.c   | 376 +
 .../net/ethernet/mellanox/mlx5/core/fpga/ipsec.h   |  94 ++
 drivers/net/ethernet/mellanox/mlx5/core/main.c |   9 +
 include/linux/mlx5/mlx5_ifc_fpga.h |  67 
 9 files changed, 772 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig 
b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index cf1ef48bfd8d..d6c6cea8ebab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -11,9 +11,13 @@ config MLX5_CORE
  Core driver for low level functionality of the ConnectX-4 and
  Connect-IB cards by Mellanox Technologies.
 
+config MLX5_ACCEL
+   bool
+
 config MLX5_FPGA
 bool "Mellanox Technologies Innova support"
 depends on MLX5_CORE
+   select MLX5_ACCEL
 ---help---
   Build support for the Innova family of network cards by Mellanox
   Technologies. Innova network cards are comprised of a ConnectX chip
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 676388fde239..33557526f597 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -6,7 +6,10 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
fs_counters.o rl.o lag.o dev.o lib/gid.o
 
-mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o
+mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o
+
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o 
\
+   fpga/ipsec.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c 
b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
new file mode 100644
index ..53e69edaedde
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials
+ *provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT

[net-next 01/16] net/mlx5: Set interface flags before cleanup in unload_one

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

In load_one, the interface flags are changed from down to up,
only after initializing the interfaces.
In unload_one, the flags are changed from up to down before the
interface cleanup.

Change the cleanup order to be opposite to initialization order.

This fixes flag consistency between init and cleanup.

Signed-off-by: Ilan Tayari 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c7f75e12c13b..9a5a475d9e00 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1254,6 +1254,9 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv,
goto out;
}
 
+   clear_bit(MLX5_INTERFACE_STATE_UP, >intf_state);
+   set_bit(MLX5_INTERFACE_STATE_DOWN, >intf_state);
+
if (mlx5_device_registered(dev))
mlx5_detach_device(dev);
 
@@ -1282,8 +1285,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv,
mlx5_cmd_cleanup(dev);
 
 out:
-   clear_bit(MLX5_INTERFACE_STATE_UP, >intf_state);
-   set_bit(MLX5_INTERFACE_STATE_DOWN, >intf_state);
mutex_unlock(>intf_state_mutex);
return err;
 }
-- 
2.11.0

[net-next 16/16] net/mlx5e: IPSec, Add IPSec ethtool stats

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

Add Innova IPSec SBU counters to the ethtool -S stats.
Add IPSec offload error counters to the ethtool -S stats.

Signed-off-by: Ilan Tayari 
Reviewed-by: Boris Pismenny 
Reviewed-by: Gal Pressman 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   3 +-
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.h   |  43 +++
 .../mellanox/mlx5/core/en_accel/ipsec_stats.c  | 133 +
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  10 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |   4 +-
 5 files changed, 190 insertions(+), 3 deletions(-)
 create mode 100644 
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 23cb8ba91e6f..ca367445f864 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -20,4 +20,5 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o
 
-mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o
+mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
+   en_accel/ipsec_stats.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index ffc90b3c6ac7..56e00baf16cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -57,12 +57,30 @@ struct mlx5e_ipsec_sw_stats {
atomic64_t ipsec_tx_drop_metadata;
 };
 
+struct mlx5e_ipsec_stats {
+   u64 ipsec_dec_in_packets;
+   u64 ipsec_dec_out_packets;
+   u64 ipsec_dec_bypass_packets;
+   u64 ipsec_enc_in_packets;
+   u64 ipsec_enc_out_packets;
+   u64 ipsec_enc_bypass_packets;
+   u64 ipsec_dec_drop_packets;
+   u64 ipsec_dec_auth_fail_packets;
+   u64 ipsec_enc_drop_packets;
+   u64 ipsec_add_sa_success;
+   u64 ipsec_add_sa_fail;
+   u64 ipsec_del_sa_success;
+   u64 ipsec_del_sa_fail;
+   u64 ipsec_cmd_drop;
+};
+
 struct mlx5e_ipsec {
struct mlx5e_priv *en_priv;
DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
struct ida halloc;
struct mlx5e_ipsec_sw_stats sw_stats;
+   struct mlx5e_ipsec_stats stats;
 };
 
 void mlx5e_ipsec_build_inverse_table(void);
@@ -70,6 +88,11 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv);
 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
 
+int mlx5e_ipsec_get_count(struct mlx5e_priv *priv);
+int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv);
+int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data);
+
 struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
  unsigned int handle);
 
@@ -92,6 +115,26 @@ static inline void mlx5e_ipsec_build_netdev(struct 
mlx5e_priv *priv)
 {
 }
 
+static inline int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+{
+   return 0;
+}
+
+static inline int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv,
+ uint8_t *data)
+{
+   return 0;
+}
+
+static inline void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+   return 0;
+}
+
 #endif
 
 #endif /* __MLX5E_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
new file mode 100644
index ..6fea59223dc4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the

[net-next 08/16] net/mlx5: FPGA, Add FW commands for FPGA QPs

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

The FPGA QP is a high-bandwidth communication channel between the host
CPU and the FPGA device. It allows performing DMA operations between
host memory and the FPGA logic via the ConnectX chip.

Add ConnectX FW commands which create and manipulate FPGA QPs.

Signed-off-by: Ilan Tayari 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  |  10 ++
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c |  98 ++
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h |  21 +++
 include/linux/mlx5/mlx5_ifc.h  |   5 +
 include/linux/mlx5/mlx5_ifc_fpga.h | 199 +
 5 files changed, 333 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 4d5bd01f1ebb..f5a2c605749f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -307,6 +307,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev 
*dev, u16 op,
case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
+   case MLX5_CMD_OP_FPGA_DESTROY_QP:
return MLX5_CMD_STAT_OK;
 
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -419,6 +420,10 @@ static int mlx5_internal_err_ret_value(struct 
mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+   case MLX5_CMD_OP_FPGA_CREATE_QP:
+   case MLX5_CMD_OP_FPGA_MODIFY_QP:
+   case MLX5_CMD_OP_FPGA_QUERY_QP:
+   case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
*status = MLX5_DRIVER_STATUS_ABORTED;
*synd = MLX5_DRIVER_SYND;
return -EIO;
@@ -585,6 +590,11 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
+   MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP);
+   MLX5_COMMAND_STR_CASE(FPGA_MODIFY_QP);
+   MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP);
+   MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS);
+   MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
default: return "unknown command opcode";
}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
index 99cba644b4fc..8308ccbad85a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "mlx5_core.h"
 #include "fpga/cmd.h"
@@ -62,3 +63,100 @@ int mlx5_fpga_query(struct mlx5_core_dev *dev, struct 
mlx5_fpga_query *query)
query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper);
return 0;
 }
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+   u32 *fpga_qpn)
+{
+   u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {0};
+   u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)];
+   int ret;
+
+   MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP);
+   memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc,
+  MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc));
+
+   ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   if (ret)
+   return ret;
+
+   memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_create_qp_out, out, fpga_qpc),
+  MLX5_FLD_SZ_BYTES(fpga_create_qp_out, fpga_qpc));
+   *fpga_qpn = MLX5_GET(fpga_create_qp_out, out, fpga_qpn);
+   return ret;
+}
+
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+   enum mlx5_fpga_qpc_field_select fields,
+   void *fpga_qpc)
+{
+   u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {0};
+   u32 out[MLX5_ST_SZ_DW(fpga_modify_qp_out)];
+
+   MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP);
+   MLX5_SET(fpga_modify_qp_in, in, field_select, fields);
+   MLX5_SET(fpga_modify_qp_in, in, fpga_qpn, fpga_qpn);
+   memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc,
+  MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc));
+
+   return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev,
+  u32 fpga_qpn, void *fpga_qpc)
+{
+   u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {0};
+   u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)];
+   int ret;
+
+   MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP);
+   MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn);
+
+   ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+   if (ret)
+

[net-next 06/16] net/mlx5: Add QP WQ support

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

A QP in ConnectX is a concatenation of RQ and SQ which share a QP-number
and work together.
Add support for allocating and managing the work-queue buffer for a QP, in
a similar way to how SQs and RQs are already supported.

Signed-off-by: Ilan Tayari 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/wq.c | 46 
 drivers/net/ethernet/mellanox/mlx5/core/wq.h | 10 ++
 2 files changed, 56 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c 
b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 921673c42bc9..6bcfc25350f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -54,6 +54,12 @@ static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq)
return mlx5_wq_cyc_get_size(wq) << wq->log_stride;
 }
 
+static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
+{
+   return mlx5_wq_cyc_get_byte_size(>rq) +
+  mlx5_wq_cyc_get_byte_size(>sq);
+}
+
 static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
 {
return mlx5_cqwq_get_size(wq) << wq->log_stride;
@@ -99,6 +105,46 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct 
mlx5_wq_param *param,
return err;
 }
 
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *qpc, struct mlx5_wq_qp *wq,
+ struct mlx5_wq_ctrl *wq_ctrl)
+{
+   int err;
+
+   wq->rq.log_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4;
+   wq->rq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_rq_size)) - 1;
+
+   wq->sq.log_stride = ilog2(MLX5_SEND_WQE_BB);
+   wq->sq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_sq_size)) - 1;
+
+   err = mlx5_db_alloc_node(mdev, _ctrl->db, param->db_numa_node);
+   if (err) {
+   mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+   return err;
+   }
+
+   err = mlx5_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
+ _ctrl->buf, param->buf_numa_node);
+   if (err) {
+   mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
+   goto err_db_free;
+   }
+
+   wq->rq.buf = wq_ctrl->buf.direct.buf;
+   wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(>rq);
+   wq->rq.db  = _ctrl->db.db[MLX5_RCV_DBR];
+   wq->sq.db  = _ctrl->db.db[MLX5_SND_DBR];
+
+   wq_ctrl->mdev = mdev;
+
+   return 0;
+
+err_db_free:
+   mlx5_db_free(mdev, _ctrl->db);
+
+   return err;
+}
+
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 void *cqc, struct mlx5_cqwq *wq,
 struct mlx5_frag_wq_ctrl *wq_ctrl)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h 
b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 9ded5d40ce6b..718589d0cec2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -35,6 +35,7 @@
 
 #include 
 #include 
+#include 
 
 struct mlx5_wq_param {
int linear;
@@ -61,6 +62,11 @@ struct mlx5_wq_cyc {
u8  log_stride;
 };
 
+struct mlx5_wq_qp {
+   struct mlx5_wq_cyc  rq;
+   struct mlx5_wq_cyc  sq;
+};
+
 struct mlx5_cqwq {
struct mlx5_frag_buffrag_buf;
__be32  *db;
@@ -88,6 +94,10 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct 
mlx5_wq_param *param,
   struct mlx5_wq_ctrl *wq_ctrl);
 u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
 
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+ void *qpc, struct mlx5_wq_qp *wq,
+ struct mlx5_wq_ctrl *wq_ctrl);
+
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 void *cqc, struct mlx5_cqwq *wq,
 struct mlx5_frag_wq_ctrl *wq_ctrl);
-- 
2.11.0

[net-next 07/16] net/mlx5: FPGA, Move FPGA init/cleanup to init_once

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

The FPGA init and cleanup routines should be called just once per
device.
Move them to the init_once and cleanup_once routines.

Signed-off-by: Ilan Tayari 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h |  8 
 drivers/net/ethernet/mellanox/mlx5/core/main.c  | 21 ++---
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index 92d8b1b6e598..c3bb4b865f01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -135,7 +135,7 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
return err;
 }
 
-int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
+int mlx5_fpga_init(struct mlx5_core_dev *mdev)
 {
struct mlx5_fpga_device *fdev = NULL;
 
@@ -177,7 +177,7 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
mlx5_core_unreserve_gids(mdev, max_num_qps);
 }
 
-void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev)
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
 {
struct mlx5_fpga_device *fdev = mdev->fpga;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h 
b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index 557d83973ade..db1d22c356e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -68,20 +68,20 @@ struct mlx5_fpga_device {
 #define mlx5_fpga_info(__adev, format, ...) \
dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
 
-int mlx5_fpga_device_init(struct mlx5_core_dev *mdev);
-void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev);
+int mlx5_fpga_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
 void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
 void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
 
 #else
 
-static inline int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
+static inline int mlx5_fpga_init(struct mlx5_core_dev *mdev)
 {
return 0;
 }
 
-static inline void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev)
+static inline void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
 {
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c 
b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 55f9fccfc394..684612778677 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -959,8 +959,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv)
goto err_eswitch_cleanup;
}
 
+   err = mlx5_fpga_init(dev);
+   if (err) {
+   dev_err(>dev, "Failed to init fpga device %d\n", err);
+   goto err_sriov_cleanup;
+   }
+
return 0;
 
+err_sriov_cleanup:
+   mlx5_sriov_cleanup(dev);
 err_eswitch_cleanup:
 #ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_cleanup(dev->priv.eswitch);
@@ -984,6 +992,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct 
mlx5_priv *priv)
 
 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
+   mlx5_fpga_cleanup(dev);
mlx5_sriov_cleanup(dev);
 #ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_cleanup(dev->priv.eswitch);
@@ -1121,16 +1130,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv,
goto err_disable_msix;
}
 
-   err = mlx5_fpga_device_init(dev);
-   if (err) {
-   dev_err(>dev, "fpga device init failed %d\n", err);
-   goto err_put_uars;
-   }
-
err = mlx5_start_eqs(dev);
if (err) {
dev_err(>dev, "Failed to start pages and async EQs\n");
-   goto err_fpga_init;
+   goto err_put_uars;
}
 
err = alloc_comp_eqs(dev);
@@ -1205,9 +1208,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv,
 err_stop_eqs:
mlx5_stop_eqs(dev);
 
-err_fpga_init:
-   mlx5_fpga_device_cleanup(dev);
-
 err_put_uars:
mlx5_put_uars_page(dev, priv->uar);
 
@@ -1277,7 +1277,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, 
struct mlx5_priv *priv,
mlx5_irq_clear_affinity_hints(dev);
free_comp_eqs(dev);
mlx5_stop_eqs(dev);
-   mlx5_fpga_device_cleanup(dev);
mlx5_put_uars_page(dev, priv->uar);
mlx5_disable_msix(dev);
if (cleanup)
-- 
2.11.0

[net-next 04/16] IB/mlx5: Respect mlx5_core reserved GIDs

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

Reserved gids are taken by the mlx5_core, report smaller GID table
size to IB core.

Set mlx5_query_roce_port's return value back to int. In case of
error, return an indication. This rolls back some of the change
in commit 50f22fd8ecf9 ("IB/mlx5: Set mlx5_query_roce_port's return value to 
void")

Change set_roce_addr to use gid_set function, instead of directly
sending the command.

Signed-off-by: Ilan Tayari 
Reviewed-by: Leon Romanovsky 
Signed-off-by: Saeed Mahameed 
---
 drivers/infiniband/hw/mlx5/main.c | 119 +-
 1 file changed, 53 insertions(+), 66 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c 
b/drivers/infiniband/hw/mlx5/main.c
index 9f7e18612322..dc2f59e33971 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -223,8 +223,8 @@ static int translate_eth_proto_oper(u32 eth_proto_oper, u8 
*active_speed,
return 0;
 }
 
-static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
-struct ib_port_attr *props)
+static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+   struct ib_port_attr *props)
 {
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
@@ -232,12 +232,14 @@ static void mlx5_query_port_roce(struct ib_device 
*device, u8 port_num,
enum ib_mtu ndev_ib_mtu;
u16 qkey_viol_cntr;
u32 eth_prot_oper;
+   int err;
 
/* Possible bad flows are checked before filling out props so in case
 * of an error it will still be zeroed out.
 */
-   if (mlx5_query_port_eth_proto_oper(mdev, _prot_oper, port_num))
-   return;
+   err = mlx5_query_port_eth_proto_oper(mdev, _prot_oper, port_num);
+   if (err)
+   return err;
 
translate_eth_proto_oper(eth_prot_oper, >active_speed,
 >active_width);
@@ -258,7 +260,7 @@ static void mlx5_query_port_roce(struct ib_device *device, 
u8 port_num,
 
ndev = mlx5_ib_get_netdev(device, port_num);
if (!ndev)
-   return;
+   return 0;
 
if (mlx5_lag_is_active(dev->mdev)) {
rcu_read_lock();
@@ -281,75 +283,49 @@ static void mlx5_query_port_roce(struct ib_device 
*device, u8 port_num,
dev_put(ndev);
 
props->active_mtu   = min(props->max_mtu, ndev_ib_mtu);
+   return 0;
 }
 
-static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
-const struct ib_gid_attr *attr,
-void *mlx5_addr)
+static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
+unsigned int index, const union ib_gid *gid,
+const struct ib_gid_attr *attr)
 {
-#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
-   char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
-  source_l3_address);
-   void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
-  source_mac_47_32);
-
-   if (!gid)
-   return;
+   enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+   u8 roce_version = 0;
+   u8 roce_l3_type = 0;
+   bool vlan = false;
+   u8 mac[ETH_ALEN];
+   u16 vlan_id = 0;
 
-   ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+   if (gid) {
+   gid_type = attr->gid_type;
+   ether_addr_copy(mac, attr->ndev->dev_addr);
 
-   if (is_vlan_dev(attr->ndev)) {
-   MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
-   MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+   if (is_vlan_dev(attr->ndev)) {
+   vlan = true;
+   vlan_id = vlan_dev_vlan_id(attr->ndev);
+   }
}
 
-   switch (attr->gid_type) {
+   switch (gid_type) {
case IB_GID_TYPE_IB:
-   MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+   roce_version = MLX5_ROCE_VERSION_1;
break;
case IB_GID_TYPE_ROCE_UDP_ENCAP:
-   MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+   roce_version = MLX5_ROCE_VERSION_2;
+   if (ipv6_addr_v4mapped((void *)gid))
+   roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4;
+   else
+   roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6;
break;
 
default:
-   WARN_ON(true);
+   mlx5_ib_warn(dev, "Unexpected GID type %u\n", gid_type);
}
 
-   if (attr->gid_type != IB_GID_TYPE_IB) {
-   if (ipv6_addr_v4mapped((void *)gid))
-

[net-next 03/16] net/mlx5: Add support for multiple RoCE enable

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

Previously, only mlx5_ib enabled RoCE on the port, but FPGA needs it as
well.
Add support for counting number of enables, so that FPGA and IB can work
in parallel and independently.
Program the HW to enable RoCE on the first enable call, and program to
disable RoCE on the last disable call.

Signed-off-by: Ilan Tayari 
Reviewed-by: Boris Pismenny 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/vport.c | 4 
 include/linux/mlx5/driver.h | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c 
b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 06019d00ab7b..5abfec1c3399 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -926,12 +926,16 @@ static int mlx5_nic_vport_update_roce_state(struct 
mlx5_core_dev *mdev,
 
 int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
 {
+   if (atomic_inc_return(>roce.roce_en) != 1)
+   return 0;
return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
 }
 EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
 
 int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
 {
+   if (atomic_dec_return(>roce.roce_en) != 0)
+   return 0;
return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
 }
 EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 08e99bd2cd77..32b0835d4491 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -777,6 +777,7 @@ struct mlx5_core_dev {
struct mlx5e_resources  mlx5e_res;
struct {
struct mlx5_rsvd_gids   reserved_gids;
+   atomic_troce_en;
} roce;
 #ifdef CONFIG_MLX5_FPGA
struct mlx5_fpga_device *fpga;
-- 
2.11.0

[pull request][net-next 00/16] Mellanox, mlx5 Innova IPsec offload

2017-06-27 Thread Saeed Mahameed

Hi Dave,

This series from Ilan provides the support for IPsec XFRM offload
in mlx5 drivers for Innova devices.

For more detalis please see tag log from Ilan below.

Please pull and let me know if there's any problem.

Thanks,
Saeed.

---

The following changes since commit 593814d1beae8ad91be6c90f95764e09fc7ca236:

  net/mlx4: fix spelling mistake: "coalesing" -> "coalescing" (2017-06-26 
23:18:29 -0400)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git 
tags/mlx5-updates-2017-06-27

for you to fetch changes up to 164f16f7021406795729916e100c7edd53ae954f:

  net/mlx5e: IPSec, Add IPSec ethtool stats (2017-06-27 16:36:48 +0300)


mlx5-updates-2017-06-27 (Innova IPsec offload support)

This patchset adds support for Innova IPSec network interface card.

About Innova device:

Innova is a network card with a ConnectX chip and an FPGA chip as a
 bump-on-the-wire.

   Internal
+--+   Link   +-+
|  +--+  FPGA   |  +--+
| ConnectX |  |  Shell  +--+ QSFP |
|  +--++---+|  | Port |
+--+  I2C ||  SBU  ||  +--+
  |+---+|
  +--+--+---+
 |  |
  +--+--+   +---+---+
  | DDR |   | Flash |
  +-+   +---+

The FPGA synthesized logic is loaded from dedicated flash storage and has
 access to its own dedicated DDR RAM.
The ConnectX chip firmware programs the FPGA by accessing its configuration
space over either the slow internal I2C link or the high-speed internal link.

The FPGA logic is divided into a "Shell" and a "Sandbox Unit" (SBU).
mlx5_core driver (with CONFIG_MLX5_FPGA) handles all shell functionality,
while other components may handle the various SBU functionalities.

The driver opens high-speed reliable communication channels with the shell and
the SBU over the internal link.
These channels may be used for high-bandwidth configuration or for SBU-specific
out-of-band data paths.

About Innova IPSec device:
--
Innova IPSec is a network card that allows offloading IPSec cryptography 
operations
from the host CPU to the NIC. It is an Innova card with an IPSec SBU.
The hardware keeps the database of IPSec Security Associations (SADB) in the 
FPGA's
DDR memory.

   Internal
+--+   Link   +-+
|  +--+  FPGA   |  +--+
| ConnectX |  |  Shell  +--+ QSFP |
|  +--++---+|  | Port |
+--+ Internal I2C || IPSec ||  +--+
  ||  SBU  ||
  |+---+|
  +--+--+---+
 |  |
  +--+--+   +---+---+
  | DDR |   |   |
  | |   | Flash |
  |SADB |   |   |
  +-+   +---+

Modes and ciphers:
Currently the following modes and ciphers are supported:
IPv4 and IPv6
ESP tunnel and transport modes
AES 128 and 256 bit encryption, with GCM authentication (RFC4106)

IV is generated using seqiv, in sync with Linux's geniv.

More modes and ciphers may be added later.

Notes:
In the future similar functionality will be included in a single-chip NIC.

About the driver:
-
Patches 1-4 prepare some existing driver code for the new feature:
  * Add support for reserved GIDs in the hardware GID table
  * Allow multiple modules to enable hardware RoCE support independently
Patches 5-6 define structs and helper functions for QP work-queues.
Patches 7-11 add various FPGA-related features required for Innova.
IPSec.
Patch 12 adds abstraction layer for Mellanox IPSec-offload capable devices.
atches 13-16 add IPSec offload support to the mlx5 netdevice.

This driver services the new IPSec offload API introduced in commit
d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API")

Configuration Path:
If Innova IPSec device is detected, the mlx5e netdevice gets the new
NETIF_F_HW_ESP feature and the xdo callbacks, indicating ESP offload
capabilities, and also the matching TX checksum and GSO features.

The driver configures offloaded Security Associations (SAs) by sending
an ADD_SA or DEL_SA message to the IPSec SBU, which updates the SADB in DDR.
These messages and their responses are sent over a high-speed channel.
Counters for ethtool are retrieved by the driver from the SBU.

Data path:
On receive path, the SBU decrypts ESP packets which match the offloaded SADB,
but keeps them encapsulated.
The SBU injects metadata (Mellanox owned ethertype) indicating

[net-next 09/16] net/mlx5: FPGA, Add high-speed connection routines

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

An FPGA high-speed connection has two endpoints, an FPGA QP and a
ConnectX QP.
Add library routines to create and connect the endpoints of an
FPGA high-speed connection.

These routines allow creating and interacting with both types of
connections: Shell and Sandbox Unit (SBU).

Shell connection provides an interface to the FPGA's address space,
which includes the configuration space and the DDR.
Use of the shell connection will be introduced in a later patchset.

SBU connection provides a command and/or data interface to the
application-specific logic within the FPGA.
Use of the SBU connection will be introduced in a later patch in
this patchset.

Some struct definitions are added to a new header file sdk.h, which
will be extended in later patches in the patchset.
This header file will contain the in-kernel FPGA client driver API.

Signed-off-by: Ilan Tayari 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |2 +-
 .../net/ethernet/mellanox/mlx5/core/fpga/conn.c| 1042 
 .../net/ethernet/mellanox/mlx5/core/fpga/conn.h|   96 ++
 .../net/ethernet/mellanox/mlx5/core/fpga/core.c|   12 +
 .../net/ethernet/mellanox/mlx5/core/fpga/core.h|7 +
 drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h |  106 ++
 6 files changed, 1264 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile 
b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 738867bab21f..5221b1235c47 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -6,7 +6,7 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o 
pagealloc.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
fs_counters.o rl.o lag.o dev.o lib/gid.o
 
-mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c 
b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
new file mode 100644
index ..c4392f741c5f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -0,0 +1,1042 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials
+ *provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include 
+#include 
+#include 
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+#include "fpga/conn.h"
+
+#define MLX5_FPGA_PKEY 0x
+#define MLX5_FPGA_PKEY_INDEX 0 /* RoCE PKEY 0x is always at index 0 */
+#define MLX5_FPGA_RECV_SIZE 2048
+#define MLX5_FPGA_PORT_NUM 1
+#define MLX5_FPGA_CQ_BUDGET 64
+
+static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn,
+ struct mlx5_fpga_dma_buf *buf)
+{
+   struct device *dma_device;
+   int err = 0;
+
+   if (unlikely(!buf->sg[0].data))
+   goto out;
+
+   dma_device = >fdev->mdev->pdev->dev;
+   buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data,
+buf->sg[0].size, buf->dma_dir);
+   err = dma_mapping_error(dma_device, buf->sg[0].dma_addr);
+   if (unlikely(err)) {
+

[net-next 15/16] net/mlx5e: IPSec, Add Innova IPSec offload TX data path

2017-06-27 Thread Saeed Mahameed

From: Ilan Tayari 

In the TX data path, prepend a special metadata ethertype which
instructs the hardware to perform cryptography.

In addition, fill Software-Parser segment in TX descriptor so
that the hardware may parse the ESP protocol, and perform TX
checksum offload on the inner payload.

Support GSO, by providing the inverse of gso_size in the metadata.
This allows the FPGA to update the ESP header (seqno and seqiv) on the
resulting packets, by calculating the packet number within the GSO
back from the TCP sequence number.

Note that for GSO SKBs, the stack does not include an ESP trailer,
unlike the non-GSO case.

Signed-off-by: Ilan Tayari 
Signed-off-by: Yossi Kuperman 
Signed-off-by: Yevgeny Kliteynik 
Signed-off-by: Boris Pismenny 
Signed-off-by: Saeed Mahameed 
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h   |   1 +
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c   |  27 +++
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.h   |  10 +
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c   | 243 +
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h   |  13 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |   9 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c|  25 ++-
 7 files changed, 319 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 535ffd78a34e..e1b7ddfecd01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -328,6 +328,7 @@ struct mlx5e_sq_dma {
 
 enum {
MLX5E_SQ_STATE_ENABLED,
+   MLX5E_SQ_STATE_IPSEC,
 };
 
 struct mlx5e_sq_wqe_info {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index bb69660893ee..bac5103efad3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -399,10 +399,26 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
priv->ipsec = NULL;
 }
 
+static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+   if (x->props.family == AF_INET) {
+   /* Offload with IPv4 options is not supported yet */
+   if (ip_hdr(skb)->ihl > 5)
+   return false;
+   } else {
+   /* Offload with IPv6 extension headers is not support yet */
+   if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+   return false;
+   }
+
+   return true;
+}
+
 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
.xdo_dev_state_add  = mlx5e_xfrm_add_state,
.xdo_dev_state_delete   = mlx5e_xfrm_del_state,
.xdo_dev_state_free = mlx5e_xfrm_free_state,
+   .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok,
 };
 
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
@@ -431,4 +447,15 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
 
netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
+
+   if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_LSO) ||
+   !MLX5_CAP_ETH(mdev, swp_lso)) {
+   mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
+   return;
+   }
+
+   mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
+   netdev->features |= NETIF_F_GSO_ESP;
+   netdev->hw_features |= NETIF_F_GSO_ESP;
+   netdev->hw_enc_features |= NETIF_F_GSO_ESP;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h 
b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 4d745d3dd4b1..ffc90b3c6ac7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -50,6 +50,11 @@ struct mlx5e_ipsec_sw_stats {
atomic64_t ipsec_rx_drop_sp_alloc;
atomic64_t ipsec_rx_drop_sadb_miss;
atomic64_t ipsec_rx_drop_syndrome;
+   atomic64_t ipsec_tx_drop_bundle;
+   atomic64_t ipsec_tx_drop_no_state;
+   atomic64_t ipsec_tx_drop_not_ip;
+   atomic64_t ipsec_tx_drop_trailer;
+   atomic64_t ipsec_tx_drop_metadata;
 };
 
 struct mlx5e_ipsec {
@@ -60,6 +65,7 @@ struct mlx5e_ipsec {
struct mlx5e_ipsec_sw_stats sw_stats;
 };
 
+void mlx5e_ipsec_build_inverse_table(void);
 int mlx5e_ipsec_init(struct mlx5e_priv *priv);
 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
@@ -69,6 +75,10 @@ struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct 
mlx5e_ipsec *dev,
 
 #else
 
+static inline void mlx5e_ipsec_build_inverse_table(void)
+{
+}
+
 static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 {
return 0;
diff --git

Re: [PATCH 05/11] net: stmmac: dwmac-rk: Add internal phy support

2017-06-27 Thread David.Wu


Hi Florian,

Sorry for reply late.

在 2017/6/24 0:22, Florian Fainelli 写道:

On 06/22/2017 09:59 PM, David Wu wrote:

To make internal phy worked, need to configure the phy_clock,
phy cru_reset and related registers.

Change-Id: I6971c0a769754b824b1b908b56080cbaf7867d13
Signed-off-by: David Wu 
---
  .../devicetree/bindings/net/rockchip-dwmac.txt |  3 +
  drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 82 ++
  2 files changed, 85 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt 
b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
index 8f42755..0514f69 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
@@ -22,6 +22,7 @@ Required properties:
   < SCLK_MACREF_OUT> clock gate for RMII reference clock output
   < ACLK_GMAC>: AXI clock gate for GMAC
   < PCLK_GMAC>: APB clock gate for GMAC
+  < MAC_PHY>: clock for internal macphy
   - clock-names: One name for each entry in the clocks property.
   - phy-mode: See ethernet.txt file in the same directory.
   - pinctrl-names: Names corresponding to the numbered pinctrl states.
@@ -35,6 +36,8 @@ Required properties:
   - assigned-clocks: main clock, should be < SCLK_MAC>;
   - assigned-clock-parents = parent of main clock.
 can be <_gmac> or < SCLK_MAC_PLL>.
+ - phy-type: For internal phy, it must be "internal"; For external phy, no need
+   to configure this.


Use the standard "phy-mode" property. You will see
drivers/net/ethernet/broadcom/genet/ actually define a phy-mode =
"internal" property specifically for that. This should probably be
generalized so it is useful to other drivers a well, I will do just that.



I'm a little confused for the property of phy-mode = "internal".
If the property of phy-mode is configured as "internal" from DT , i 
could not get the rmii or rgmii mode for the phy.

I use it to differentiate rmii or rgmii for different configuration.

  
  Optional properties:

   - tx_delay: Delay value for TXD timing. Range value is 0~0x7F, 0x30 as 
default.
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c 
b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index a8e8fd5..c1a1413 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -41,6 +41,7 @@ struct rk_gmac_ops {
void (*set_to_rmii)(struct rk_priv_data *bsp_priv);
void (*set_rgmii_speed)(struct rk_priv_data *bsp_priv, int speed);
void (*set_rmii_speed)(struct rk_priv_data *bsp_priv, int speed);
+   void (*internal_phy_powerup)(struct rk_priv_data *bsp_priv);
  };
  
  struct rk_priv_data {

@@ -52,6 +53,7 @@ struct rk_priv_data {
  
  	bool clk_enabled;

bool clock_input;
+   bool internal_phy;
  
  	struct clk *clk_mac;

struct clk *gmac_clkin;
@@ -61,6 +63,9 @@ struct rk_priv_data {
struct clk *clk_mac_refout;
struct clk *aclk_mac;
struct clk *pclk_mac;
+   struct clk *clk_macphy;
+
+   struct reset_control *macphy_reset;
  
  	int tx_delay;

int rx_delay;
@@ -750,6 +755,48 @@ static void rk3399_set_rmii_speed(struct rk_priv_data 
*bsp_priv, int speed)
.set_rmii_speed = rk3399_set_rmii_speed,
  };
  
+#define RK_GRF_MACPHY_CON0		0xb00

+#define RK_GRF_MACPHY_CON1 0xb04
+#define RK_GRF_MACPHY_CON2 0xb08
+#define RK_GRF_MACPHY_CON3 0xb0c
+
+#define RK_MACPHY_ENABLE   GRF_BIT(0)
+#define RK_MACPHY_DISABLE  GRF_CLR_BIT(0)
+#define RK_MACPHY_CFG_CLK_50M  GRF_BIT(14)
+#define RK_GMAC2PHY_RMII_MODE  (GRF_BIT(6) | GRF_CLR_BIT(7))
+#define RK_GRF_CON2_MACPHY_ID  HIWORD_UPDATE(0x1234, 0x, 0)
+#define RK_GRF_CON3_MACPHY_ID  HIWORD_UPDATE(0x35, 0x3f, 0)
+
+static void rk_gmac_internal_phy_powerup(struct rk_priv_data *priv)
+{
+   if (priv->ops->internal_phy_powerup)
+   priv->ops->internal_phy_powerup(priv);
+
+   regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_CFG_CLK_50M);
+   regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_GMAC2PHY_RMII_MODE);
+
+   regmap_write(priv->grf, RK_GRF_MACPHY_CON2, RK_GRF_CON2_MACPHY_ID);
+   regmap_write(priv->grf, RK_GRF_MACPHY_CON3, RK_GRF_CON3_MACPHY_ID);
+
+   /* disable macphy, the default value is enabled */
+   regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_DISABLE);
+   if (priv->macphy_reset)
+   reset_control_assert(priv->macphy_reset);
+   usleep_range(10, 20);
+   if (priv->macphy_reset)
+   reset_control_deassert(priv->macphy_reset);
+   usleep_range(10, 20);
+   regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_ENABLE);
+   msleep(30);
+}
+
+static void rk_gmac_internal_phy_powerdown(struct rk_priv_data *priv)
+{
+   regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_DISABLE);

Re: [v2] brcmfmac: Fix a memory leak in error handling path in 'brcmf_cfg80211_attach'

2017-06-27 Thread Kalle Valo

Christophe Jaillet  wrote:

> If 'wiphy_new()' fails, we leak 'ops'. Add a new label in the error
> handling path to free it in such a case.
> 
> Cc: sta...@vger.kernel.org
> Fixes: 5c22fb85102a7 ("brcmfmac: add wowl gtk rekeying offload support")
> Signed-off-by: Christophe JAILLET 

Patch applied to wireless-drivers-next.git, thanks.

57c00f2fac51 brcmfmac: Fix a memory leak in error handling path in 
'brcmf_cfg80211_attach'

-- 
https://patchwork.kernel.org/patch/9800763/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

[PATCH net] net: prevent sign extension in dev_get_stats()

2017-06-27 Thread Eric Dumazet

From: Eric Dumazet 

Similar to the fix provided by Dominik Heidler in commit
9b3dc0a17d73 ("l2tp: cast l2tp traffic counter to unsigned")
we need to take care of 32bit kernels in dev_get_stats().

When using atomic_long_read(), we add a 'long' to u64 and
might misinterpret high order bit, unless we cast to unsigned.

Fixes: caf586e5f23ce ("net: add a core netdev->rx_dropped counter")
Fixes: 015f0688f57ca ("net: net: add a core netdev->tx_dropped counter")
Fixes: 6e7333d315a76 ("net: add rx_nohandler stat counter")
Signed-off-by: Eric Dumazet 
Cc: Jarod Wilson 
---
Note: I will provide similar other fixes in networking tree.

 net/core/dev.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 7243421c9783..91bb55070533 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7783,9 +7783,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device 
*dev,
} else {
netdev_stats_to_stats64(storage, >stats);
}
-   storage->rx_dropped += atomic_long_read(>rx_dropped);
-   storage->tx_dropped += atomic_long_read(>tx_dropped);
-   storage->rx_nohandler += atomic_long_read(>rx_nohandler);
+   storage->rx_dropped += (unsigned 
long)atomic_long_read(>rx_dropped);
+   storage->tx_dropped += (unsigned 
long)atomic_long_read(>tx_dropped);
+   storage->rx_nohandler += (unsigned 
long)atomic_long_read(>rx_nohandler);
return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);

[PATCH net-next 1/3] net: ethernet: ti: cpsw: move skb timestamp to packet_submit

2017-06-27 Thread Ivan Khoronzhuk

Move sw timestamp function close to channel submit function.

Signed-off-by: Ivan Khoronzhuk 
---
 drivers/net/ethernet/ti/cpsw.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index b7a0f5e..422994e 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1236,6 +1236,7 @@ static inline int cpsw_tx_packet_submit(struct cpsw_priv 
*priv,
 {
struct cpsw_common *cpsw = priv->cpsw;
 
+   skb_tx_timestamp(skb);
return cpdma_chan_submit(txch, skb, skb->data, skb->len,
 priv->emac_port + cpsw->data.dual_emac);
 }
@@ -1611,8 +1612,6 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff 
*skb,
cpts_is_tx_enabled(cpsw->cpts))
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
-   skb_tx_timestamp(skb);
-
q_idx = skb_get_queue_mapping(skb);
if (q_idx >= cpsw->tx_ch_num)
q_idx = q_idx % cpsw->tx_ch_num;
-- 
2.7.4

[PATCH net-next 3/3] net: ethernet: ti: netcp_ethss: use cpts to check if packet needs timestamping

2017-06-27 Thread Ivan Khoronzhuk

There is cpts function to check if packet can be timstamped with cpts.
Seems that ptp_classify_raw cover all cases listed with "case".

Signed-off-by: Ivan Khoronzhuk 
---
 drivers/net/ethernet/ti/netcp_ethss.c | 18 +-
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/ti/netcp_ethss.c 
b/drivers/net/ethernet/ti/netcp_ethss.c
index 0847a8f..28cb38a 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -2503,24 +2503,8 @@ static bool gbe_need_txtstamp(struct gbe_intf *gbe_intf,
  const struct netcp_packet *p_info)
 {
struct sk_buff *skb = p_info->skb;
-   unsigned int class = ptp_classify_raw(skb);
 
-   if (class == PTP_CLASS_NONE)
-   return false;
-
-   switch (class) {
-   case PTP_CLASS_V1_IPV4:
-   case PTP_CLASS_V1_IPV6:
-   case PTP_CLASS_V2_IPV4:
-   case PTP_CLASS_V2_IPV6:
-   case PTP_CLASS_V2_L2:
-   case (PTP_CLASS_V2_VLAN | PTP_CLASS_L2):
-   case (PTP_CLASS_V2_VLAN | PTP_CLASS_IPV4):
-   case (PTP_CLASS_V2_VLAN | PTP_CLASS_IPV6):
-   return true;
-   }
-
-   return false;
+   return cpts_can_timestamp(gbe_intf->gbe_dev->cpts, skb);
 }
 
 static int gbe_txtstamp_mark_pkt(struct gbe_intf *gbe_intf,
-- 
2.7.4

[PATCH net-next 2/3] net: ethernet: ti: cpsw: fix sw timestamping for non PTP packets

2017-06-27 Thread Ivan Khoronzhuk

The cpts can timestmap only ptp packets at this moment, so driver
cannot mark every packet as though it's going to be timestamped,
only because h/w timestamping for given skb is enabled with
SKBTX_HW_TSTAMP. It doesn't allow to use sw timestamping, as result
outgoing packet is not timestamped at all if it's not PTP and h/w
timestamping is enabled. So, fix it by setting SKBTX_IN_PROGRESS
only for PTP packets.

Signed-off-by: Ivan Khoronzhuk 
---
 drivers/net/ethernet/ti/cpsw.c |  3 ++-
 drivers/net/ethernet/ti/cpts.h | 16 
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 422994e..1850e34 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1598,6 +1598,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff 
*skb,
 {
struct cpsw_priv *priv = netdev_priv(ndev);
struct cpsw_common *cpsw = priv->cpsw;
+   struct cpts *cpts = cpsw->cpts;
struct netdev_queue *txq;
struct cpdma_chan *txch;
int ret, q_idx;
@@ -1609,7 +1610,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff 
*skb,
}
 
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-   cpts_is_tx_enabled(cpsw->cpts))
+   cpts_is_tx_enabled(cpts) && cpts_can_timestamp(cpts, skb))
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
q_idx = skb_get_queue_mapping(skb);
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index c96eca2..01ea82b 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct cpsw_cpts {
@@ -155,6 +156,16 @@ static inline bool cpts_is_tx_enabled(struct cpts *cpts)
return !!cpts->tx_enable;
 }
 
+static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb)
+{
+   unsigned int class = ptp_classify_raw(skb);
+
+   if (class == PTP_CLASS_NONE)
+   return false;
+
+   return true;
+}
+
 #else
 struct cpts;
 
@@ -203,6 +214,11 @@ static inline bool cpts_is_tx_enabled(struct cpts *cpts)
 {
return false;
 }
+
+static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb)
+{
+   return false;
+}
 #endif
 
 
-- 
2.7.4

[PATCH net-next 0/3] fix sw timestamping for non PTP packets

2017-06-27 Thread Ivan Khoronzhuk

This series contains several corrections connected with timestamping
for cpsw and netcp drivers based on same cpts module.

Based on net/next

Ivan Khoronzhuk (3):
  net: ethernet: ti: cpsw: move skb timestamp to packet_submit
  net: ethernet: ti: cpsw: fix sw timestamping for non PTP packets
  net: ethernet: ti: netcp_ethss: use cpts to check if packet needs
timestamping

 drivers/net/ethernet/ti/cpsw.c|  6 +++---
 drivers/net/ethernet/ti/cpts.h| 16 
 drivers/net/ethernet/ti/netcp_ethss.c | 18 +-
 3 files changed, 20 insertions(+), 20 deletions(-)

-- 
2.7.4

[net-next] net: remove policy-routing.txt documentation

2017-06-27 Thread Vincent Bernat

It dates back from 2.1.16 and is obsolete since 2.1.68 when the current
rule system has been introduced.

Signed-off-by: Vincent Bernat 
---
 Documentation/networking/policy-routing.txt | 150 
 1 file changed, 150 deletions(-)
 delete mode 100644 Documentation/networking/policy-routing.txt

diff --git a/Documentation/networking/policy-routing.txt 
b/Documentation/networking/policy-routing.txt
deleted file mode 100644
index 36f6936d7f21..
--- a/Documentation/networking/policy-routing.txt
+++ /dev/null
@@ -1,150 +0,0 @@
-Classes

-
-   "Class" is a complete routing table in common sense.
-   I.e. it is tree of nodes (destination prefix, tos, metric)
-   with attached information: gateway, device etc.
-   This tree is looked up as specified in RFC1812 5.2.4.3
-   1. Basic match
-   2. Longest match
-   3. Weak TOS.
-   4. Metric. (should not be in kernel space, but they are)
-   5. Additional pruning rules. (not in kernel space).
-   
-   We have two special type of nodes:
-   REJECT - abort route lookup and return an error value.
-   THROW  - abort route lookup in this class.
-
-
-   Currently the number of classes is limited to 255
-   (0 is reserved for "not specified class")
-
-   Three classes are builtin:
-
-   RT_CLASS_LOCAL=255 - local interface addresses,
-   broadcasts, nat addresses.
-
-   RT_CLASS_MAIN=254  - all normal routes are put there
-   by default.
-
-   RT_CLASS_DEFAULT=253 - if ip_fib_model==1, then
-   normal default routes are put there, if ip_fib_model==2
-   all gateway routes are put there.
-
-
-Rules
--
-   Rule is a record of (src prefix, src interface, tos, dst prefix)
-   with attached information.
-
-   Rule types:
-   RTP_ROUTE - lookup in attached class
-   RTP_NAT   - lookup in attached class and if a match is found,
-   translate packet source address.
-   RTP_MASQUERADE - lookup in attached class and if a match is found,
-   masquerade packet as sourced by us.
-   RTP_DROP   - silently drop the packet.
-   RTP_REJECT - drop the packet and send ICMP NET UNREACHABLE.
-   RTP_PROHIBIT - drop the packet and send ICMP COMM. ADM. PROHIBITED.
-
-   Rule flags:
-   RTRF_LOG - log route creations.
-   RTRF_VALVE - One way route (used with masquerading)
-
-Default setup:
-
-root@amber:/pub/ip-routing # iproute -r
-Kernel routing policy rules
-Pref Source DestinationTOS Iface   Cl
-   0 defaultdefault00  *   255
- 254 defaultdefault00  *   254
- 255 defaultdefault00  *   253
-
-
-Lookup algorithm
-
-
-   We scan rules list, and if a rule is matched, apply it.
-   If a route is found, return it.
-   If it is not found or a THROW node was matched, continue
-   to scan rules.
-
-Applications
-
-
-1. Just ignore classes. All the routes are put into MAIN class
-   (and/or into DEFAULT class).
-
-   HOWTO:  iproute add PREFIX [ tos TOS ] [ gw GW ] [ dev DEV ]
-   [ metric METRIC ] [ reject ] ... (look at iproute utility)
-
-   or use route utility from current net-tools.
-   
-2. Opposite case. Just forget all that you know about routing
-   tables. Every rule is supplied with its own gateway, device
-   info. record. This approach is not appropriate for automated
-   route maintenance, but it is ideal for manual configuration.
-
-   HOWTO:  iproute addrule [ from PREFIX ] [ to PREFIX ] [ tos TOS ]
-   [ dev INPUTDEV] [ pref PREFERENCE ] route [ gw GATEWAY ]
-   [ dev OUTDEV ] .
-
-   Warning: As of now the size of the routing table in this
-   approach is limited to 256. If someone likes this model, I'll
-   relax this limitation.
-
-3. OSPF classes (see RFC1583, RFC1812 E.3.3)
-   Very clean, stable and robust algorithm for OSPF routing
-   domains. Unfortunately, it is not widely used in the Internet.
-
-   Proposed setup:
-   255 local addresses
-   254 interface routes
-   253 ASE routes with external metric
-   252 ASE routes with internal metric
-   251 inter-area routes
-   250 intra-area routes for 1st area
-   249 intra-area routes for 2nd area
-   etc.
-   
-   Rules:
-   iproute addrule class 253
-   iproute addrule class 252
-   iproute addrule class 251
-   iproute addrule to a-prefix-for-1st-area class 250
-   iproute addrule to another-prefix-for-1st-area class 250
-   ...
-   iproute addrule to a-prefix-for-2nd-area class 249
-   ...
-
-   Area classes must be terminated with reject record.
-   iproute add default reject class 250
-   iproute add default reject class 249
-   ...
-

Re: [PATCH NET V5 2/2] net: hns: Use phy_driver to setup Phy loopback

2017-06-27 Thread Andrew Lunn

> >> -  phy_write(phy_dev, COPPER_CONTROL_REG, val);
> >> +  err = phy_resume(phy_dev);
> > 
> > Maybe this was discussed with an earlier version of these patches. Why
> > are using phy_resume() and phy_suspend()?
> When self_test is invoked with ETH_TEST_FL_OFFLINE option, hns mac driver
> call dev_close to set net dev to offline state if net dev is online.
> Doing the actual phy loolback test require phy is power up, So phy_resume
> and phy_suspend are used.

O.K, so you at least need some comments, because this is not obvious.

>From your description, it sounds like you can call phy_resume() on a
device which is not suspended. In general, suspend is expected to
store away state which will be lost when powering down a
device. Resume writes that state back into the device after it is
powered up. So resuming a device which was never suspended could write
bad state into it.

Also, what about if WOL has been set before closing the device?

  Andrew

[PATCH v3 net-next 08/12] selftests/bpf: add a test to test_align

2017-06-27 Thread Edward Cree

New test adds 14 to the unknown value before adding to the packet pointer,
 meaning there's no 'fixed offset' field and instead we add into the
 var_off, yielding a '4n+2' value.

Signed-off-by: Edward Cree 
---
 tools/testing/selftests/bpf/test_align.c | 67 
 1 file changed, 67 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_align.c 
b/tools/testing/selftests/bpf/test_align.c
index 031bba8..5165d8e 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -374,6 +374,73 @@ static struct bpf_align_test tests[] = {
{33, 
"R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
},
},
+   {
+   .descr = "packet variable offset 2",
+   .insns = {
+   /* Create an unknown offset, (4n+2)-aligned */
+   LOAD_UNKNOWN(BPF_REG_6),
+   BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+   /* Add it to the packet pointer */
+   BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+   BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+   /* Check bounds and perform a read */
+   BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+   BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+   BPF_EXIT_INSN(),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+   /* Make a (4n) offset from the value we just read */
+   BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xff),
+   BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+   /* Add it to the packet pointer */
+   BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+   /* Check bounds and perform a read */
+   BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+   BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+   BPF_EXIT_INSN(),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   BPF_EXIT_INSN(),
+   },
+   .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+   .matches = {
+   /* Calculated offset in R6 has unknown value, but known
+* alignment of 4.
+*/
+   {8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+   {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 
0x3fc))"},
+   /* Adding 14 makes R6 be (4n+2) */
+   {9, 
"R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+   /* Packet pointer has (4n+2) offset */
+   {11, 
"R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+   {13, 
"R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+   /* At the time the word size load is performed from R5,
+* its total fixed offset is NET_IP_ALIGN + reg->off (0)
+* which is 2.  Then the variable offset is (4n+2), so
+* the total offset is 4-byte aligned and meets the
+* load's requirements.
+*/
+   {15, 
"R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+   /* Newly read value in R6 was shifted left by 2, so has
+* known alignment of 4.
+*/
+   {18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 
0x3fc))"},
+   /* Added (4n) to packet pointer's (4n+2) var_off, giving
+* another (4n+2).
+*/
+   {19, 
"R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+   {21, 
"R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+   /* At the time the word size load is performed from R5,
+* its total fixed offset is NET_IP_ALIGN + reg->off (0)
+* which is 2.  Then the variable offset is (4n+2), so
+* the total offset is 4-byte aligned and meets the
+* load's requirements.
+*/
+   {23, 
"R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+   },
+   },
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)

[PATCH v3 net-next 11/12] selftests/bpf: add tests for subtraction & negative numbers

2017-06-27 Thread Edward Cree

Signed-off-by: Edward Cree 
---
 tools/testing/selftests/bpf/test_align.c | 104 +++
 1 file changed, 104 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_align.c 
b/tools/testing/selftests/bpf/test_align.c
index dfd96c6..6bc2ceb 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -497,6 +497,110 @@ static struct bpf_align_test tests[] = {
{16, 
"R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2;
 0x7ffc))"},
}
},
+   {
+   .descr = "variable subtraction",
+   .insns = {
+   /* Create an unknown offset, (4n+2)-aligned */
+   LOAD_UNKNOWN(BPF_REG_6),
+   BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+   BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+   /* Create another unknown, (4n)-aligned, and subtract
+* it from the first one
+*/
+   BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+   BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_7),
+   /* Bounds-check the result */
+   BPF_JMP_IMM(BPF_JSGE, BPF_REG_6, 0, 1),
+   BPF_EXIT_INSN(),
+   /* Add it to the packet pointer */
+   BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+   BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+   /* Check bounds and perform a read */
+   BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+   BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+   BPF_EXIT_INSN(),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+   BPF_EXIT_INSN(),
+   },
+   .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+   .matches = {
+   /* Calculated offset in R6 has unknown value, but known
+* alignment of 4.
+*/
+   {7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+   {9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 
0x3fc))"},
+   /* Adding 14 makes R6 be (4n+2) */
+   {10, 
"R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+   /* New unknown value in R7 is (4n) */
+   {11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 
0x3fc))"},
+   /* Subtracting it from R6 blows our unsigned bounds */
+   {12, 
"R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 
0xfffc))"},
+   /* Checked s>= 0 */
+   {14, 
"R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+   /* At the time the word size load is performed from R5,
+* its total fixed offset is NET_IP_ALIGN + reg->off (0)
+* which is 2.  Then the variable offset is (4n+2), so
+* the total offset is 4-byte aligned and meets the
+* load's requirements.
+*/
+   {20, 
"R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+   },
+   },
+   {
+   .descr = "pointer variable subtraction",
+   .insns = {
+   /* Create an unknown offset, (4n+2)-aligned and bounded
+* to [14,74]
+*/
+   LOAD_UNKNOWN(BPF_REG_6),
+   BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+   BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xf),
+   BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+   /* Subtract it from the packet pointer */
+   BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+   BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_6),
+   /* Create another unknown, (4n)-aligned and >= 74.
+* That in fact means >= 76, since 74 % 4 == 2
+*/
+   BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 76),
+   /* Add it to the packet pointer */
+   BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_7),
+   /* Check bounds and perform a read */
+   BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+

[PATCH v3 net-next 12/12] selftests/bpf: variable offset negative tests

2017-06-27 Thread Edward Cree

Variable ctx accesses and stack accesses aren't allowed, because we can't
 determine what type of value will be read.

Signed-off-by: Edward Cree 
---
 tools/testing/selftests/bpf/test_verifier.c | 41 +
 1 file changed, 41 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
index 7df3c34..471fbee 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -5311,6 +5311,47 @@ static struct bpf_test tests[] = {
.errstr = "R0 min value is negative, either use unsigned index 
or do a if (index >=0) check.",
.result = REJECT,
},
+   {
+   "variable-offset ctx access",
+   .insns = {
+   /* Get an unknown value */
+   BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+   /* Make it small and 4-byte aligned */
+   BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+   /* add it to skb.  We now have either >len or
+* >pkt_type, but we don't know which
+*/
+   BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+   /* dereference it */
+   BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+   BPF_EXIT_INSN(),
+   },
+   .errstr = "variable ctx access var_off=(0x0; 0x4)",
+   .result = REJECT,
+   .prog_type = BPF_PROG_TYPE_LWT_IN,
+   },
+   {
+   "variable-offset stack access",
+   .insns = {
+   /* Fill the top 8 bytes of the stack */
+   BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+   /* Get an unknown value */
+   BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+   /* Make it small and 4-byte aligned */
+   BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+   BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+   /* add it to fp.  We now have either fp-4 or fp-8, but
+* we don't know which
+*/
+   BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+   /* dereference it */
+   BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
+   BPF_EXIT_INSN(),
+   },
+   .errstr = "variable stack access var_off=(0xfff8; 
0x4)",
+   .result = REJECT,
+   .prog_type = BPF_PROG_TYPE_LWT_IN,
+   },
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)

[PATCH v3 net-next 09/12] selftests/bpf: add test for bogus operations on pointers

2017-06-27 Thread Edward Cree

Tests non-add/sub operations (AND, LSH) on pointers decaying them to
 unknown scalars.
Also tests that a pkt_ptr add which could potentially overflow is rejected
 (find_good_pkt_pointers ignores it and doesn't give us any reg->range).

Signed-off-by: Edward Cree 
---
 tools/testing/selftests/bpf/test_align.c | 66 +++-
 1 file changed, 64 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_align.c 
b/tools/testing/selftests/bpf/test_align.c
index 5165d8e..dfd96c6 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -441,6 +441,62 @@ static struct bpf_align_test tests[] = {
{23, 
"R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
},
},
+   {
+   .descr = "dubious pointer arithmetic",
+   .insns = {
+   PREP_PKT_POINTERS,
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   /* ptr & const => unknown & const */
+   BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+   BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 0x40),
+   /* ptr << const => unknown << const */
+   BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+   BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2),
+   /* We have a (4n) value.  Let's make a packet offset
+* out of it.  First add 14, to make it a (4n+2)
+*/
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+   /* Then make sure it's nonnegative */
+   BPF_JMP_IMM(BPF_JSGE, BPF_REG_5, 0, 1),
+   BPF_EXIT_INSN(),
+   /* Add it to packet pointer */
+   BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+   BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+   /* Check bounds and perform a read */
+   BPF_MOV64_REG(BPF_REG_4, BPF_REG_6),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+   BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+   BPF_EXIT_INSN(),
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_6, 0),
+   BPF_EXIT_INSN(),
+   },
+   .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+   .result = REJECT,
+   .matches = {
+   {4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
+   /* ptr & 0x40 == either 0 or 0x40 */
+   {5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
+   /* ptr << 2 == unknown, (4n) */
+   {7, 
"R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0;
 0xfffc))"},
+   /* (4n) + 14 == (4n+2).  We blow our bounds, because
+* the add could overflow.
+*/
+   {8, "R5=inv(id=0,var_off=(0x2; 0xfffc))"},
+   /* Checked s>=0 */
+   {10, 
"R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 
0x7ffc))"},
+   /* packet pointer + nonnegative (4n+2) */
+   {12, 
"R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2;
 0x7ffc))"},
+   {14, 
"R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2;
 0x7ffc))"},
+   /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
+* We checked the bounds, but it might have been able
+* to overflow if the packet pointer started in the
+* upper half of the address space.
+* So we did not get a 'range' on R6, and the access
+* attempt will fail.
+*/
+   {16, 
"R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2;
 0x7ffc))"},
+   }
+   },
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -470,10 +526,15 @@ static int do_test_single(struct bpf_align_test *test)
fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
 prog, prog_len, 1, "GPL", 0,
 bpf_vlog, sizeof(bpf_vlog));
-   if (fd_prog < 0) {
+   if (fd_prog < 0 && test->result != REJECT) {
printf("Failed to load program.\n");
printf("%s", bpf_vlog);
ret = 1;
+   } else if (fd_prog >= 0 && test->result == REJECT) {
+   printf("Unexpected success to load!\n");
+   printf("%s",

[PATCH v3 net-next 10/12] selftests/bpf: don't try to access past MAX_PACKET_OFF in test_verifier

2017-06-27 Thread Edward Cree

"direct packet access: test2" was potentially reading four bytes from
 pkt + 0x, which could take it past the verifier's limit, causing
 the program to be rejected.
Increase the shifts by one so that R2 is now mask 0x7fff instead of
 mask 0x.

Signed-off-by: Edward Cree 
---
 tools/testing/selftests/bpf/test_verifier.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
index 210a031..7df3c34 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2330,8 +2330,8 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, data)),
BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
-   BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 48),
-   BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 48),
+   BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
+   BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),

[PATCH v3 net-next 07/12] selftests/bpf: rewrite test_align

2017-06-27 Thread Edward Cree

Expectations have changed, as has the format of the logged state.
To make the tests easier to read, add a line-matching framework so that
 each match need only quote the register it cares about.  (Multiple
 matches may refer to the same line, but matches must be listed in
 order of increasing line.)

Signed-off-by: Edward Cree 
---
 tools/testing/selftests/bpf/test_align.c | 225 ++-
 1 file changed, 132 insertions(+), 93 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_align.c 
b/tools/testing/selftests/bpf/test_align.c
index bccebd9..031bba8 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -27,6 +27,11 @@
 #define MAX_INSNS  512
 #define MAX_MATCHES16
 
+struct bpf_reg_match {
+   unsigned int line;
+   const char *match;
+};
+
 struct bpf_align_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
@@ -36,10 +41,14 @@ struct bpf_align_test {
REJECT
} result;
enum bpf_prog_type prog_type;
-   const char *matches[MAX_MATCHES];
+   /* Matches must be in order of increasing line */
+   struct bpf_reg_match matches[MAX_MATCHES];
 };
 
 static struct bpf_align_test tests[] = {
+   /* Four tests of known constants.  These aren't staggeringly
+* interesting since we track exact values now.
+*/
{
.descr = "mov",
.insns = {
@@ -53,11 +62,13 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
-   "1: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 
R10=fp",
-   "2: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 
R10=fp",
-   "3: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 
R10=fp",
-   "4: R1=ctx 
R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
-   "5: R1=ctx 
R3=imm32,min_value=32,max_value=32,min_align=32 R10=fp",
+   {1, "R1=ctx(id=0,off=0,imm=0)"},
+   {1, "R10=fp0"},
+   {1, "R3=inv2"},
+   {2, "R3=inv4"},
+   {3, "R3=inv8"},
+   {4, "R3=inv16"},
+   {5, "R3=inv32"},
},
},
{
@@ -79,17 +90,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
-   "1: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 
R10=fp",
-   "2: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 
R10=fp",
-   "3: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 
R10=fp",
-   "4: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 
R10=fp",
-   "5: R1=ctx 
R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
-   "6: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 
R10=fp",
-   "7: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 
R4=imm32,min_value=32,max_value=32,min_align=32 R10=fp",
-   "8: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 
R4=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
-   "9: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 
R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
-   "10: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 
R4=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
-   "11: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 
R4=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
+   {1, "R1=ctx(id=0,off=0,imm=0)"},
+   {1, "R10=fp0"},
+   {1, "R3=inv1"},
+   {2, "R3=inv2"},
+   {3, "R3=inv4"},
+   {4, "R3=inv8"},
+   {5, "R3=inv16"},
+   {6, "R3=inv1"},
+   {7, "R4=inv32"},
+   {8, "R4=inv16"},
+   {9, "R4=inv8"},
+   {10, "R4=inv4"},
+   {11, "R4=inv2"},
},
},
{
@@ -106,12 +119,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
-   "1: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 
R10=fp",
-   "2: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=4 
R10=fp",
-   "3: R1=ctx 
R3=imm10,min_value=10,max_value=10,min_align=2 R10=fp",
-   "4: R1=ctx 
R3=imm10,min_value=10,max_value=10,min_align=2 
R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
-

[PATCH v3 net-next 06/12] selftests/bpf: change test_verifier expectations

2017-06-27 Thread Edward Cree

Some of the verifier's error messages have changed, and some constructs
 that previously couldn't be verified are now accepted.

Signed-off-by: Edward Cree 
---
 tools/testing/selftests/bpf/test_verifier.c | 226 ++--
 1 file changed, 116 insertions(+), 110 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
index fceed67..210a031 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -421,7 +421,7 @@ static struct bpf_test tests[] = {
BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
-   .errstr_unpriv = "R1 pointer arithmetic",
+   .errstr_unpriv = "R1 subtraction from stack pointer",
.result_unpriv = REJECT,
.errstr = "R1 invalid mem access",
.result = REJECT,
@@ -603,8 +603,9 @@ static struct bpf_test tests[] = {
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
BPF_EXIT_INSN(),
},
-   .errstr = "misaligned access",
+   .errstr = "misaligned stack access",
.result = REJECT,
+   .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
},
{
"invalid map_fd for function call",
@@ -650,8 +651,9 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.fixup_map1 = { 3 },
-   .errstr = "misaligned access",
+   .errstr = "misaligned value access",
.result = REJECT,
+   .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
},
{
"sometimes access memory with incorrect alignment",
@@ -672,6 +674,7 @@ static struct bpf_test tests[] = {
.errstr = "R0 invalid mem access",
.errstr_unpriv = "R0 leaks addr",
.result = REJECT,
+   .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
},
{
"jump test 1",
@@ -1215,8 +1218,9 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, cb[0]) + 1),
BPF_EXIT_INSN(),
},
-   .errstr = "misaligned access",
+   .errstr = "misaligned context access",
.result = REJECT,
+   .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
},
{
"check __sk_buff->hash, offset 0, half store not permitted",
@@ -1319,8 +1323,9 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, cb[0]) + 2),
BPF_EXIT_INSN(),
},
-   .errstr = "misaligned access",
+   .errstr = "misaligned context access",
.result = REJECT,
+   .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
},
{
"check cb access: word, unaligned 2",
@@ -1330,8 +1335,9 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, cb[4]) + 1),
BPF_EXIT_INSN(),
},
-   .errstr = "misaligned access",
+   .errstr = "misaligned context access",
.result = REJECT,
+   .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
},
{
"check cb access: word, unaligned 3",
@@ -1341,8 +1347,9 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, cb[4]) + 2),
BPF_EXIT_INSN(),
},
-   .errstr = "misaligned access",
+   .errstr = "misaligned context access",
.result = REJECT,
+   .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
},
{
"check cb access: word, unaligned 4",
@@ -1352,8 +1359,9 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, cb[4]) + 3),
BPF_EXIT_INSN(),
},
-   .errstr = "misaligned access",
+   .errstr = "misaligned context access",
.result = REJECT,
+   .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
},
{
"check cb access: double",
@@ -1379,8 +1387,9 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, cb[1])),
BPF_EXIT_INSN(),
},
-   .errstr = "misaligned access",
+   .errstr = "misaligned context access",
.result = REJECT,
+   .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
},
{
"check cb access: double, unaligned 2",
@@ -1390,8 +1399,9 @@ static struct bpf_test tests[] = {

[PATCH v3 net-next 05/12] bpf/verifier: more concise register state logs for constant var_off

2017-06-27 Thread Edward Cree

Signed-off-by: Edward Cree 
---
 kernel/bpf/verifier.c | 46 +++---
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d45c1d1..3e1df75 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -234,25 +234,33 @@ static void print_verifier_state(struct 
bpf_verifier_state *state)
verbose(",ks=%d,vs=%d",
reg->map_ptr->key_size,
reg->map_ptr->value_size);
-   if (reg->smin_value != reg->umin_value &&
-   reg->smin_value != S64_MIN)
-   verbose(",smin_value=%lld",
-   (long long)reg->smin_value);
-   if (reg->smax_value != reg->umax_value &&
-   reg->smax_value != S64_MAX)
-   verbose(",smax_value=%lld",
-   (long long)reg->smax_value);
-   if (reg->umin_value != 0)
-   verbose(",umin_value=%llu",
-   (unsigned long long)reg->umin_value);
-   if (reg->umax_value != U64_MAX)
-   verbose(",umax_value=%llu",
-   (unsigned long long)reg->umax_value);
-   if (!tnum_is_unknown(reg->var_off)) {
-   char tn_buf[48];
-
-   tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
-   verbose(",var_off=%s", tn_buf);
+   if (tnum_is_const(reg->var_off)) {
+   /* Typically an immediate SCALAR_VALUE, but
+* could be a pointer whose offset is too big
+* for reg->off
+*/
+   verbose(",imm=%llx", reg->var_off.value);
+   } else {
+   if (reg->smin_value != reg->umin_value &&
+   reg->smin_value != S64_MIN)
+   verbose(",smin_value=%lld",
+   (long long)reg->smin_value);
+   if (reg->smax_value != reg->umax_value &&
+   reg->smax_value != S64_MAX)
+   verbose(",smax_value=%lld",
+   (long long)reg->smax_value);
+   if (reg->umin_value != 0)
+   verbose(",umin_value=%llu",
+   (unsigned long 
long)reg->umin_value);
+   if (reg->umax_value != U64_MAX)
+   verbose(",umax_value=%llu",
+   (unsigned long 
long)reg->umax_value);
+   if (!tnum_is_unknown(reg->var_off)) {
+   char tn_buf[48];
+
+   tnum_strn(tn_buf, sizeof(tn_buf), 
reg->var_off);
+   verbose(",var_off=%s", tn_buf);
+   }
}
verbose(")");
}

[PATCH v3 net-next 04/12] bpf/verifier: track signed and unsigned min/max values

2017-06-27 Thread Edward Cree

Allows us to, sometimes, combine information from a signed check of one
 bound and an unsigned check of the other.
We now track the full range of possible values, rather than restricting
 ourselves to [0, 1<<30) and considering anything beyond that as
 unknown.  While this is probably not necessary, it makes the code more
 straightforward and symmetrical between signed and unsigned bounds.

Signed-off-by: Edward Cree 
---
 include/linux/bpf_verifier.h |  22 +-
 include/linux/tnum.h |   2 +
 kernel/bpf/tnum.c|  16 +
 kernel/bpf/verifier.c| 727 ++-
 4 files changed, 471 insertions(+), 296 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index ca7e2ce..84c6576 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -11,11 +11,15 @@
 #include  /* for MAX_BPF_STACK */
 #include 
 
- /* Just some arbitrary values so we can safely do math without overflowing and
-  * are obviously wrong for any sort of memory access.
-  */
-#define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024)
-#define BPF_REGISTER_MIN_RANGE -1
+/* Maximum variable offset umax_value permitted when resolving memory accesses.
+ * In practice this is far bigger than any realistic pointer offset; this limit
+ * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
+ */
+#define BPF_MAX_VAR_OFF(1ULL << 31)
+/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO].  This ensures
+ * that converting umax_value to int cannot overflow.
+ */
+#define BPF_MAX_VAR_SIZINT_MAX
 
 struct bpf_reg_state {
enum bpf_reg_type type;
@@ -38,7 +42,7 @@ struct bpf_reg_state {
 * PTR_TO_MAP_VALUE_OR_NULL, we have to NULL-check it _first_.
 */
u32 id;
-   /* These three fields must be last.  See states_equal() */
+   /* These five fields must be last.  See states_equal() */
/* For scalar types (SCALAR_VALUE), this represents our knowledge of
 * the actual value.
 * For pointer types, this represents the variable part of the offset
@@ -51,8 +55,10 @@ struct bpf_reg_state {
 * These refer to the same value as var_off, not necessarily the actual
 * contents of the register.
 */
-   s64 min_value; /* minimum possible (s64)value */
-   u64 max_value; /* maximum possible (u64)value */
+   s64 smin_value; /* minimum possible (s64)value */
+   s64 smax_value; /* maximum possible (s64)value */
+   u64 umin_value; /* minimum possible (u64)value */
+   u64 umax_value; /* maximum possible (u64)value */
 };
 
 enum bpf_stack_slot_type {
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index a0b07bf..0d2d3da 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -17,6 +17,8 @@ struct tnum {
 struct tnum tnum_const(u64 value);
 /* A completely unknown value */
 extern const struct tnum tnum_unknown;
+/* A value that's unknown except that @min <= value <= @max */
+struct tnum tnum_range(u64 min, u64 max);
 
 /* Arithmetic and logical ops */
 /* Shift a tnum left (by a fixed shift) */
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index 92eeeb1..1f4bf68 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -17,6 +17,22 @@ struct tnum tnum_const(u64 value)
return TNUM(value, 0);
 }
 
+struct tnum tnum_range(u64 min, u64 max)
+{
+   u64 chi = min ^ max, delta;
+   u8 bits = fls64(chi);
+
+   /* special case, needed because 1ULL << 64 is undefined */
+   if (bits > 63)
+   return tnum_unknown;
+   /* e.g. if chi = 4, bits = 3, delta = (1<<3) - 1 = 7.
+* if chi = 0, bits = 0, delta = (1<<0) - 1 = 0, so we return
+*  constant min (since min == max).
+*/
+   delta = (1ULL << bits) - 1;
+   return TNUM(min & ~delta, delta);
+}
+
 struct tnum tnum_lshift(struct tnum a, u8 shift)
 {
return TNUM(a.value << shift, a.mask << shift);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 82823f1..d45c1d1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -234,12 +234,20 @@ static void print_verifier_state(struct 
bpf_verifier_state *state)
verbose(",ks=%d,vs=%d",
reg->map_ptr->key_size,
reg->map_ptr->value_size);
-   if (reg->min_value != BPF_REGISTER_MIN_RANGE)
-   verbose(",min_value=%lld",
-   (long long)reg->min_value);
-   if (reg->max_value != BPF_REGISTER_MAX_RANGE)
-   verbose(",max_value=%llu",
-   (unsigned long long)reg->max_value);
+   if (reg->smin_value != reg->umin_value &&
+   reg->smin_value != S64_MIN)
+

[PATCH v3 net-next 03/12] nfp: change bpf verifier hooks to match new verifier data structures

2017-06-27 Thread Edward Cree

Signed-off-by: Edward Cree 
---
 drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 24 +--
 kernel/bpf/tnum.c |  1 +
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c 
b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index d696ba4..5b783a9 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -79,28 +79,32 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
   const struct bpf_verifier_env *env)
 {
const struct bpf_reg_state *reg0 = >cur_state.regs[0];
+   u64 imm;
 
if (nfp_prog->act == NN_ACT_XDP)
return 0;
 
-   if (reg0->type != CONST_IMM) {
-   pr_info("unsupported exit state: %d, imm: %llx\n",
-   reg0->type, reg0->imm);
+   if (!(reg0->type == SCALAR_VALUE && tnum_is_const(reg0->var_off))) {
+   char tn_buf[48];
+
+   tnum_strn(tn_buf, sizeof(tn_buf), reg0->var_off);
+   pr_info("unsupported exit state: %d, var_off: %s\n",
+   reg0->type, tn_buf);
return -EINVAL;
}
 
-   if (nfp_prog->act != NN_ACT_DIRECT &&
-   reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) {
+   imm = reg0->var_off.value;
+   if (nfp_prog->act != NN_ACT_DIRECT && imm != 0 && (imm & ~0U) != ~0U) {
pr_info("unsupported exit state: %d, imm: %llx\n",
-   reg0->type, reg0->imm);
+   reg0->type, imm);
return -EINVAL;
}
 
-   if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT &&
-   reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN &&
-   reg0->imm != TC_ACT_QUEUED) {
+   if (nfp_prog->act == NN_ACT_DIRECT && imm <= TC_ACT_REDIRECT &&
+   imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN &&
+   imm != TC_ACT_QUEUED) {
pr_info("unsupported exit state: %d, imm: %llx\n",
-   reg0->type, reg0->imm);
+   reg0->type, imm);
return -EINVAL;
}
 
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index 803bd0d..92eeeb1 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -141,6 +141,7 @@ int tnum_strn(char *str, size_t size, struct tnum a)
 {
return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask);
 }
+EXPORT_SYMBOL_GPL(tnum_strn);
 
 int tnum_sbin(char *str, size_t size, struct tnum a)
 {

[PATCH v3 net-next 02/12] bpf/verifier: rework value tracking

2017-06-27 Thread Edward Cree

Tracks value alignment by means of tracking known & unknown bits.
Tightens some min/max value checks and fixes a couple of bugs therein.
If pointer leaks are allowed, and adjust_ptr_min_max_vals returns -EACCES,
 treat the pointer as an unknown scalar and try again, because we might be
 able to conclude something about the result (e.g. pointer & 0x40 is either
 0 or 0x40).

Signed-off-by: Edward Cree 
---
 include/linux/bpf.h  |   34 +-
 include/linux/bpf_verifier.h |   40 +-
 include/linux/tnum.h |   79 ++
 kernel/bpf/Makefile  |2 +-
 kernel/bpf/tnum.c|  163 
 kernel/bpf/verifier.c| 1692 --
 6 files changed, 1235 insertions(+), 775 deletions(-)
 create mode 100644 include/linux/tnum.h
 create mode 100644 kernel/bpf/tnum.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index deca4e7..0fc3bbc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -116,35 +116,25 @@ enum bpf_access_type {
 };
 
 /* types of values stored in eBPF registers */
+/* Pointer types represent:
+ * pointer
+ * pointer + imm
+ * pointer + (u16) var
+ * pointer + (u16) var + imm
+ * if (range > 0) then [ptr, ptr + range - off) is safe to access
+ * if (id > 0) means that some 'var' was added
+ * if (off > 0) means that 'imm' was added
+ */
 enum bpf_reg_type {
NOT_INIT = 0,/* nothing was written into register */
-   UNKNOWN_VALUE,   /* reg doesn't contain a valid pointer */
+   SCALAR_VALUE,/* reg doesn't contain a valid pointer */
PTR_TO_CTX,  /* reg points to bpf_context */
CONST_PTR_TO_MAP,/* reg points to struct bpf_map */
PTR_TO_MAP_VALUE,/* reg points to map element value */
PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
-   FRAME_PTR,   /* reg == frame_pointer */
-   PTR_TO_STACK,/* reg == frame_pointer + imm */
-   CONST_IMM,   /* constant integer value */
-
-   /* PTR_TO_PACKET represents:
-* skb->data
-* skb->data + imm
-* skb->data + (u16) var
-* skb->data + (u16) var + imm
-* if (range > 0) then [ptr, ptr + range - off) is safe to access
-* if (id > 0) means that some 'var' was added
-* if (off > 0) menas that 'imm' was added
-*/
-   PTR_TO_PACKET,
+   PTR_TO_STACK,/* reg == frame_pointer + offset */
+   PTR_TO_PACKET,   /* reg points to skb->data */
PTR_TO_PACKET_END,   /* skb->data + headlen */
-
-   /* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map
-* elem value.  We only allow this if we can statically verify that
-* access from this register are going to fall within the size of the
-* map element.
-*/
-   PTR_TO_MAP_VALUE_ADJ,
 };
 
 struct bpf_prog;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 621076f..ca7e2ce 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -9,6 +9,7 @@
 
 #include  /* for enum bpf_reg_type */
 #include  /* for MAX_BPF_STACK */
+#include 
 
  /* Just some arbitrary values so we can safely do math without overflowing and
   * are obviously wrong for any sort of memory access.
@@ -19,30 +20,39 @@
 struct bpf_reg_state {
enum bpf_reg_type type;
union {
-   /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE 
*/
-   s64 imm;
-
-   /* valid when type == PTR_TO_PACKET* */
-   struct {
-   u16 off;
-   u16 range;
-   };
+   /* valid when type == PTR_TO_PACKET */
+   u32 range;
 
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
 *   PTR_TO_MAP_VALUE_OR_NULL
 */
struct bpf_map *map_ptr;
};
+   /* Fixed part of pointer offset, pointer types only */
+   s32 off;
+   /* Used to find other pointers with the same variable offset, so they
+* can share range knowledge.
+* Exception: for PTR_TO_MAP_VALUE_OR_NULL this is used to share which
+* map value we came from, when one is tested for != NULL.  Note that
+* this overloading means that we can't do pointer arithmetic on a
+* PTR_TO_MAP_VALUE_OR_NULL, we have to NULL-check it _first_.
+*/
u32 id;
+   /* These three fields must be last.  See states_equal() */
+   /* For scalar types (SCALAR_VALUE), this represents our knowledge of
+* the actual value.
+* For pointer types, this represents the variable part of the offset
+* from the pointed-to object, and is shared with all bpf_reg_states
+* with the same id as us.
+*/
+   struct tnum var_off;
/* Used to determine if any memory

[PATCH v3 net-next 01/12] selftests/bpf: add test for mixed signed and unsigned bounds checks

2017-06-27 Thread Edward Cree

Currently fails due to bug in verifier bounds handling.

Signed-off-by: Edward Cree 
---
 tools/testing/selftests/bpf/test_verifier.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
index c0af019..fceed67 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -5279,6 +5279,32 @@ static struct bpf_test tests[] = {
.errstr = "invalid bpf_context access",
.prog_type = BPF_PROG_TYPE_LWT_IN,
},
+   {
+   "bounds checks mixing signed and unsigned",
+   .insns = {
+   BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+   BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+   BPF_LD_MAP_FD(BPF_REG_1, 0),
+   BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+BPF_FUNC_map_lookup_elem),
+   BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+   BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+   BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+   BPF_MOV64_IMM(BPF_REG_2, -1),
+   BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
+   BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+   BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+   BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   BPF_EXIT_INSN(),
+   },
+   .fixup_map1 = { 3 },
+   .errstr_unpriv = "R0 pointer arithmetic prohibited",
+   .errstr = "R0 min value is negative, either use unsigned index 
or do a if (index >=0) check.",
+   .result = REJECT,
+   .result_unpriv = REJECT,
+   },
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)

[PATCH v3 net-next 00/12] bpf: rewrite value tracking in verifier

2017-06-27 Thread Edward Cree

This series simplifies alignment tracking, generalises bounds tracking and
 fixes some bounds-tracking bugs in the BPF verifier.  Pointer arithmetic on
 packet pointers, stack pointers, map value pointers and context pointers has
 been unified, and bounds on these pointers are only checked when the pointer
 is dereferenced.
Operations on pointers which destroy all relation to the original pointer
 (such as multiplies and shifts) are disallowed if !env->allow_ptr_leaks,
 otherwise they convert the pointer to an unknown scalar and feed it to the
 normal scalar arithmetic handling.
Pointer types have been unified with the corresponding adjusted-pointer types
 where those existed (e.g. PTR_TO_MAP_VALUE[_ADJ] or FRAME_PTR vs
 PTR_TO_STACK); similarly, CONST_IMM and UNKNOWN_VALUE have been unified into
 SCALAR_VALUE.
Pointer types (except CONST_PTR_TO_MAP, PTR_TO_MAP_VALUE_OR_NULL and
 PTR_TO_PACKET_END, which do not allow arithmetic) have a 'fixed offset' and
 a 'variable offset'; the former is used when e.g. adding an immediate or a
 known-constant register, as long as it does not overflow.  Otherwise the
 latter is used, and any operation creating a new variable offset creates a
 new 'id' (and, for PTR_TO_PACKET, clears the 'range').
SCALAR_VALUEs use the 'variable offset' fields to track the range of possible
 values; the 'fixed offset' should never be set on a scalar.

As of patch 12/12, all tests of tools/testing/selftests/bpf/test_verifier
 and tools/testing/selftests/bpf/test_align pass.

v3: added a few more tests; removed RFC tags.

v2: fixed nfp build, made test_align pass again and extended it with a few
 new tests (though still need to add more).

Edward Cree (12):
  selftests/bpf: add test for mixed signed and unsigned bounds checks
  bpf/verifier: rework value tracking
  nfp: change bpf verifier hooks to match new verifier data structures
  bpf/verifier: track signed and unsigned min/max values
  bpf/verifier: more concise register state logs for constant var_off
  selftests/bpf: change test_verifier expectations
  selftests/bpf: rewrite test_align
  selftests/bpf: add a test to test_align
  selftests/bpf: add test for bogus operations on pointers
  selftests/bpf: don't try to access past MAX_PACKET_OFF in
test_verifier
  selftests/bpf: add tests for subtraction & negative numbers
  selftests/bpf: variable offset negative tests

 drivers/net/ethernet/netronome/nfp/bpf/verifier.c |   24 +-
 include/linux/bpf.h   |   34 +-
 include/linux/bpf_verifier.h  |   56 +-
 include/linux/tnum.h  |   81 +
 kernel/bpf/Makefile   |2 +-
 kernel/bpf/tnum.c |  180 ++
 kernel/bpf/verifier.c | 1943 -
 tools/testing/selftests/bpf/test_align.c  |  462 -
 tools/testing/selftests/bpf/test_verifier.c   |  293 ++--
 9 files changed, 2034 insertions(+), 1041 deletions(-)
 create mode 100644 include/linux/tnum.h
 create mode 100644 kernel/bpf/tnum.c

[PATCH net-next] vxlan: fix incorrect nlattr access in MTU check

2017-06-27 Thread Matthias Schiffer

The access to the wrong variable could lead to a NULL dereference and
possibly other invalid memory reads in vxlan newlink/changelink requests
with a IFLA_MTU attribute.

Fixes: a985343ba906 "vxlan: refactor verification and application of 
configuration"
Signed-off-by: Matthias Schiffer 
---
 drivers/net/vxlan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 0dafd8e6c665..fd0ff97e3d81 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2727,7 +2727,7 @@ static int vxlan_validate(struct nlattr *tb[], struct 
nlattr *data[],
}
 
if (tb[IFLA_MTU]) {
-   u32 mtu = nla_get_u32(data[IFLA_MTU]);
+   u32 mtu = nla_get_u32(tb[IFLA_MTU]);
 
if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU)
return -EINVAL;
-- 
2.13.2

Re: [PATCH v6 05/21] net-next: stmmac: Add dwmac-sun8i

2017-06-27 Thread Corentin Labbe

On Tue, Jun 27, 2017 at 11:33:56AM +0100, Andre Przywara wrote:
> Hi,
> 
> On 27/06/17 11:23, Icenowy Zheng wrote:
> > 
> > 
> > 于 2017年6月27日 GMT+08:00 下午6:15:58, Andre Przywara  
> > 写到:
> >> Hi,
> >>
> >> On 27/06/17 10:41, Maxime Ripard wrote:
> >>> On Tue, Jun 27, 2017 at 10:02:45AM +0100, Andre Przywara wrote:
>  Hi,
> 
>  (CC:ing some people from that Rockchip dmwac series)
> 
>  On 27/06/17 09:21, Corentin Labbe wrote:
> > On Tue, Jun 27, 2017 at 04:11:21PM +0800, Chen-Yu Tsai wrote:
> >> On Tue, Jun 27, 2017 at 4:05 PM, Corentin Labbe
> >>  wrote:
> >>> On Mon, Jun 26, 2017 at 01:18:23AM +0100, André Przywara wrote:
>  On 31/05/17 08:18, Corentin Labbe wrote:
> > The dwmac-sun8i is a heavy hacked version of stmmac hardware by
> > allwinner.
> > In fact the only common part is the descriptor management and
> >> the first
> > register function.
> 
>  Hi,
> 
>  I know I am a bit late with this, but while adapting the U-Boot
> >> driver
>  to the new binding I was wondering about the internal PHY
> >> detection:
> 
> 
>  So here you seem to deduce the usage of the internal PHY by the
> >> PHY
>  interface specified in the DT (MII = internal, RGMII =
> >> external).
>  I think I raised this question before, but isn't it perfectly
> >> legal for
>  a board to use MII with an external PHY even on those SoCs that
> >> feature
>  an internal PHY?
>  On the first glance that does not make too much sense, but apart
> >> from
>  not being the correct binding to describe all of the SoCs
> >> features I see
>  two scenarios:
>  1) A board vendor might choose to not use the internal PHY
> >> because it
>  has bugs, lacks features (configurability) or has other issues.
> >> For
>  instance I have heard reports that the internal PHY makes the
> >> SoC go
>  rather hot, possibly limiting the CPU frequency. By using an
> >> external
>  MII PHY (which are still cheaper than RGMII PHYs) this can be
> >> avoided.
>  2) A PHY does not necessarily need to be directly connected to
>  magnetics. Indeed quite some boards use (RG)MII to connect to a
> >> switch
>  IC or some other network circuitry, for instance fibre
> >> connectors.
> 
>  So I was wondering if we would need an explicit:
>    allwinner,use-internal-phy;
>  boolean DT property to signal the usage of the internal PHY?
>  Alternatively we could go with the negative version:
>    allwinner,disable-internal-phy;
> 
>  Or what about introducing a new "allwinner,internal-mii-phy"
> >> compatible
>  string for the *PHY* node and use that?
> 
>  I just want to avoid that we introduce a binding that causes us
>  headaches later. I think we can still fix this with a followup
> >> patch
>  before the driver and its binding hit a release kernel.
> 
>  Cheers,
>  Andre.
> 
> >>>
> >>> I just see some patch, where "phy-mode = internal" is valid.
> >>> I will try to find a way to use it
> >>
> >> Can you provide a link?
> >
> > https://lkml.org/lkml/2017/6/23/479
> >
> >>
> >> I'm not a fan of using phy-mode for this. There's no guarantee
> >> what
> >> mode the internal PHY uses. That's what phy-mode is for.
> 
>  I can understand Chen-Yu's concerns, but ...
> 
> > For each soc the internal PHY mode is know and setted in
> >> emac_variant/internal_phy
> > So its not a problem.
> 
>  that is true as well, at least for now.
> 
>  So while I agree that having a separate property to indicate the
> >> usage
>  of the internal PHY would be nice, I am bit tempted to use this
> >> easier
>  approach and piggy back on the existing phy-mode property.
> >>>
> >>> We're trying to fix an issue that works for now too.
> >>>
> >>> If we want to consider future weird cases, then we must consider all
> >>> of them. And the phy mode changing is definitely not really far
> >>> fetched.
> >>>
> >>> I agree with Chen-Yu, and I really feel like the compatible solution
> >>> you suggested would cover both your concerns, and ours.
> >>
> >> So something like this?
> >>emac: emac@1c3 {
> >>compatible = "allwinner,sun8i-h3-emac";
> >>...
> >>phy-mode = "mii";
> >>phy-handle = <_mii_phy>;
> >>...
> >>
> >>mdio: mdio {
> >>#address-cells = <1>;
> >>#size-cells = <0>;
> >>int_mii_phy: ethernet-phy@1 {
> >>compatible = "allwinner,sun8i-h3-ephy";
> >>syscon = <>;
> > 
> > The MAC still needs to set some bits of syscon register.

Re: [Qemu-devel] BUG: KASAN: use-after-free in free_old_xmit_skbs

2017-06-27 Thread Jean-Philippe Menil


On 06/27/2017 04:13 AM, Jason Wang wrote:



On 2017年06月26日 15:35, Jean-Philippe Menil wrote:

On 06/26/2017 04:50 AM, Jason Wang wrote:



On 2017年06月24日 06:32, Cong Wang wrote:
On Fri, Jun 23, 2017 at 1:43 AM, Jason Wang  
wrote:


On 2017年06月23日 02:53, Michael S. Tsirkin wrote:

On Thu, Jun 22, 2017 at 08:15:58AM +0200, jean-philippe menil wrote:

Hi Michael,

from what i see, the race appear when we hit virtnet_reset in
virtnet_xdp_set.
virtnet_reset
_remove_vq_common
  virtnet_del_vqs
virtnet_free_queues
  kfree(vi->sq)
when the xdp program (with two instances of the program to 
trigger it

faster)
is added or removed.

It's easily repeatable, with 2 cpus and 4 queues on the qemu command
line,
running the xdp_ttl tool from Jesper.

For now, i'm able to continue my qualification, testing if xdp_qp 
is not

null,
but do not seem to be a sustainable trick.
if (xdp_qp && vi->xdp_queues_pairs != xdp_qp)

Maybe it will be more clear to you with theses informations.

Best regards.

Jean-Philippe


I'm pretty clear about the issue here, I was trying to figure out 
a fix.

Jason, any thoughts?



Hi Jean:

Does the following fix this issue? (I can't reproduce it locally 
through

xdp_ttl)

It is tricky here.

 From my understanding of the code base, the tx_lock is not sufficient
here, because in virtnet_del_vqs() all vqs are deleted and one vp
maps to one txq.

I am afraid you have to add a spinlock somewhere to serialized
free_old_xmit_skbs() vs. vring_del_virtqueue(). As you can see
they are in different layers, so it is hard to figure out where to add
it...

Also, make sure we don't sleep inside the spinlock, I see a
synchronize_net().


Looks like I miss something. I thought free_old_xmit_skbs() were 
serialized in this case since we disable all tx queues after 
netif_tx_unlock_bh()?


Jean:

I thought this could be easily reproduced by e.g produce some traffic 
and in the same time try to attach an xdp program. But looks not. How 
do you trigger this? What's your qemu command line for this?


Thanks


Hi Jason,

this is how i trigger the bug:
- on the guest, tcpdump on on the interface
- on the guest, run iperf against the host
- on the guest, cat /sys/kernel/debug/tracing/trace_pipe
- on the guest, run one or two instances of xdp_ttl compiled with 
DEBUG uncommented, that i start stop, until i trigger the bug.


qemu command line is as follow:

qemu-system-x86_64 -name ubuntu --enable-kvm -machine pc,accel=kvm 
-smp 2 -drive file=/dev/LocalDisk/ubuntu,if=virtio,format=raw -m 2048 
-rtc base=localtime,clock=host -usbdevice tablet --balloon virtio 
-netdev 
tap,id=ubuntu-0,ifname=ubuntu-0,script=/home/jenfi/WORK/jp/qemu/if-up,downscript=/home/jenfi/WORK/jp/qemu/if-down,vhost=on,queues=4 
-device 
virtio-net-pci,netdev=ubuntu-0,mac=de:ad:be:ef:01:03,mq=on,guest_tso4=off,guest_tso6=off,guest_ecn=off,guest_ufo=off,vectors=2 
-vnc 127.0.0.1:3 -nographic -serial 
file:/home/jenfi/WORK/jp/qemu/ubuntu.out -monitor 
unix:/home/jenfi/WORK/jp/qemu/ubuntu.sock,server,nowait


Notice, the smp 2, queues to 4 and vectors to 2.
Seem that if fogot to mention that in the beginning of this thread, 
sorry for that.


Best regards.

Jean-Philippe



Thanks Jean, I manage to reproduce the issue.

I thought netif_tx_unlock_bh() will do tx lock but looks not, that's why 
previous patch doesn't work.


Could you please this this patch? (At least it can't trigger the warning 
after more than 20 times of xdp start/stop).


diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1f8c15c..a18f859 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1802,6 +1802,7 @@ static void virtnet_freeze_down(struct 
virtio_device *vdev)

 flush_work(>config_work);

 netif_device_detach(vi->dev);
+   netif_tx_disable(vi->dev);
 cancel_delayed_work_sync(>refill);

 if (netif_running(vi->dev)) {




Hi Jason,

Seem to do the trick !
with your patch, i'm unable to repeat the problem anymore (running more 
than 2h without any issue).


Best regards.

Jean-Philippe

RE: [PATCH NET V6 1/2] net: phy: Add phy loopback support in net phy framework

2017-06-27 Thread Madalin-cristian Bucur

> -Original Message-
> From: netdev-ow...@vger.kernel.org [mailto:netdev-ow...@vger.kernel.org]
> On Behalf Of Lin Yun Sheng
> Sent: Tuesday, June 27, 2017 2:01 PM
> To: da...@davemloft.net; and...@lunn.ch; f.faine...@gmail.com
> Cc: huangda...@hisilicon.com; xuw...@hisilicon.com;
> liguo...@hisilicon.com; yisen.zhu...@huawei.com;
> gabriele.paol...@huawei.com; john.ga...@huawei.com; linux...@huawei.com;
> yisen.zhu...@huawei.com; salil.me...@huawei.com; lipeng...@huawei.com;
> trem...@gmail.com; netdev@vger.kernel.org; linux-ker...@vger.kernel.org
> Subject: [PATCH NET V6 1/2] net: phy: Add phy loopback support in net phy
> framework
> 
> This patch add set_loopback in phy_driver, which is used by Mac
> driver to enable or disable a phy. it also add a generic
> genphy_loopback function, which use BMCR loopback bit to enable
> or disable a phy.

"disable a phy" or disable the PHY loopback function?

> 
> Signed-off-by: Lin Yun Sheng 
> ---
>  drivers/net/phy/marvell.c|  1 +
>  drivers/net/phy/phy_device.c | 51
> 
>  include/linux/phy.h  |  5 +
>  3 files changed, 57 insertions(+)
> 
> diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
> index 57297ba..01a1586 100644
> --- a/drivers/net/phy/marvell.c
> +++ b/drivers/net/phy/marvell.c
> @@ -2094,6 +2094,7 @@ static int m88e1510_probe(struct phy_device *phydev)
>   .get_sset_count = marvell_get_sset_count,
>   .get_strings = marvell_get_strings,
>   .get_stats = marvell_get_stats,
> + .set_loopback = genphy_loopback,
>   },
>   {
>   .phy_id = MARVELL_PHY_ID_88E1540,
> diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
> index 1219eea..1e08d62 100644
> --- a/drivers/net/phy/phy_device.c
> +++ b/drivers/net/phy/phy_device.c
> @@ -1123,6 +1123,39 @@ int phy_resume(struct phy_device *phydev)
>  }
>  EXPORT_SYMBOL(phy_resume);
> 
> +int phy_loopback(struct phy_device *phydev, bool enable)
> +{
> + struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
> + int ret = 0;
> +
> + mutex_lock(>lock);
> +
> + if (enable && phydev->loopback_enabled) {
> + ret = -EBUSY;
> + goto out;
> + }
> +
> + if (!enable && !phydev->loopback_enabled) {
> + ret = -EINVAL;
> + goto out;
> + }
> +

if (enable == phydev->loopback_enabled)

> + if (phydev->drv && phydrv->set_loopback)
> + ret = phydrv->set_loopback(phydev, enable);
> + else
> + ret = -EOPNOTSUPP;
> +
> + if (ret)
> + goto out;
> +
> + phydev->loopback_enabled = enable;
> +
> +out:
> + mutex_unlock(>lock);
> + return ret;
> +}
> +EXPORT_SYMBOL(phy_loopback);
> +
>  /* Generic PHY support and helper functions */
> 
>  /**
> @@ -1628,6 +1661,23 @@ static int gen10g_resume(struct phy_device *phydev)
>   return 0;
>  }
> 
> +int genphy_loopback(struct phy_device *phydev, bool enable)
> +{
> + int value;
> +
> + value = phy_read(phydev, MII_BMCR);
> + if (value < 0)
> + return value;
> +
> + if (enable)
> + value |= BMCR_LOOPBACK;
> + else
> + value &= ~BMCR_LOOPBACK;
> +
> + return phy_write(phydev, MII_BMCR, value);
> +}
> +EXPORT_SYMBOL(genphy_loopback);
> +
>  static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
>  {
>   /* The default values for phydev->supported are provided by the PHY
> @@ -1874,6 +1924,7 @@ void phy_drivers_unregister(struct phy_driver *drv,
> int n)
>   .read_status= genphy_read_status,
>   .suspend= genphy_suspend,
>   .resume = genphy_resume,
> + .set_loopback   = genphy_loopback,
>  }, {
>   .phy_id = 0x,
>   .phy_id_mask= 0x,
> diff --git a/include/linux/phy.h b/include/linux/phy.h
> index e76e4ad..49c903dc 100644
> --- a/include/linux/phy.h
> +++ b/include/linux/phy.h
> @@ -364,6 +364,7 @@ struct phy_c45_device_ids {
>   * is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch,
> etc.
>   * has_fixups: Set to true if this phy has fixups/quirks.
>   * suspended: Set to true if this phy has been suspended successfully.
> + * loopback_enabled: Set true if this phy has been loopbacked
> successfully.
>   * state: state of the PHY for management purposes
>   * dev_flags: Device-specific flags used by the PHY driver.
>   * link_timeout: The number of timer firings to wait before the
> @@ -400,6 +401,7 @@ struct phy_device {
>   bool is_pseudo_fixed_link;
>   bool has_fixups;
>   bool suspended;
> + bool loopback_enabled;
> 
>   enum phy_state state;
> 
> @@ -639,6 +641,7 @@ struct phy_driver {
>   int (*set_tunable)(struct phy_device *dev,
>   struct ethtool_tunable *tuna,
>   const void *data);
> + int

Re: [PATCH][V2] net/mlx4: fix spelling mistake: "enforcment" -> "enforcement"

2017-06-27 Thread Tariq Toukan




On 27/06/2017 1:36 PM, Colin King wrote:

From: Colin Ian King 

Trivial fix to spelling mistake in mlx4_dbg debug message

Signed-off-by: Colin Ian King 
---
  drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c 
b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index c1af47e45d3f..9e4c142c7ecd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -3280,7 +3280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int 
port, int vf, int link_stat
  
  	if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))

mlx4_dbg(dev,
-"updating vf %d port %d no link state HW enforcment\n",
+"updating vf %d port %d no link state HW 
enforcement\n",
 vf, port);
return 0;
  }


Acked-by: Tariq Toukan 

Thanks Colin.

[PATCH] cavium: thunder: Remove duplicate "netdev->name" logging output

2017-06-27 Thread Joe Perches

Using netdev_(netdev, "%s: ...", netdev->name) duplicates the
name in the output.  Remove those uses.

Miscellanea:

o Use the netif_ convenience macros at the same time

Signed-off-by: Joe Perches 
---
 drivers/net/ethernet/cavium/thunder/nicvf_main.c   | 33 --
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c |  8 ++
 2 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c 
b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 573755b0a51b..49b80da51ba7 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -227,15 +227,14 @@ static void  nicvf_handle_mbx_intr(struct nicvf *nic)
nic->speed = mbx.link_status.speed;
nic->mac_type = mbx.link_status.mac_type;
if (nic->link_up) {
-   netdev_info(nic->netdev, "%s: Link is Up %d Mbps %s\n",
-   nic->netdev->name, nic->speed,
+   netdev_info(nic->netdev, "Link is Up %d Mbps %s 
duplex\n",
+   nic->speed,
nic->duplex == DUPLEX_FULL ?
-   "Full duplex" : "Half duplex");
+   "Full" : "Half");
netif_carrier_on(nic->netdev);
netif_tx_start_all_queues(nic->netdev);
} else {
-   netdev_info(nic->netdev, "%s: Link is Down\n",
-   nic->netdev->name);
+   netdev_info(nic->netdev, "Link is Down\n");
netif_carrier_off(nic->netdev);
netif_tx_stop_all_queues(nic->netdev);
}
@@ -721,8 +720,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
return;
 
if (netif_msg_pktdata(nic)) {
-   netdev_info(nic->netdev, "%s: skb 0x%p, len=%d\n", netdev->name,
-   skb, skb->len);
+   netdev_info(nic->netdev, "skb 0x%p, len=%d\n", skb, skb->len);
print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1,
   skb->data, skb->len, true);
}
@@ -854,10 +852,8 @@ static int nicvf_cq_intr_handler(struct net_device 
*netdev, u8 cq_idx,
netif_tx_wake_queue(txq);
nic = nic->pnicvf;
this_cpu_inc(nic->drv_stats->txq_wake);
-   if (netif_msg_tx_err(nic))
-   netdev_warn(netdev,
-   "%s: Transmit queue wakeup SQ%d\n",
-   netdev->name, txq_idx);
+   netif_warn(nic, tx_err, netdev,
+  "Transmit queue wakeup SQ%d\n", txq_idx);
}
}
 
@@ -928,9 +924,8 @@ static void nicvf_handle_qs_err(unsigned long data)
 
 static void nicvf_dump_intr_status(struct nicvf *nic)
 {
-   if (netif_msg_intr(nic))
-   netdev_info(nic->netdev, "%s: interrupt status 0x%llx\n",
-   nic->netdev->name, nicvf_reg_read(nic, NIC_VF_INT));
+   netif_info(nic, intr, nic->netdev, "interrupt status 0x%llx\n",
+  nicvf_reg_read(nic, NIC_VF_INT));
 }
 
 static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq)
@@ -1212,10 +1207,8 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, 
struct net_device *netdev)
netif_tx_wake_queue(txq);
} else {
this_cpu_inc(nic->drv_stats->txq_stop);
-   if (netif_msg_tx_err(nic))
-   netdev_warn(netdev,
-   "%s: Transmit ring full, stopping 
SQ%d\n",
-   netdev->name, qid);
+   netif_warn(nic, tx_err, netdev,
+  "Transmit ring full, stopping SQ%d\n", qid);
}
return NETDEV_TX_BUSY;
}
@@ -1600,9 +1593,7 @@ static void nicvf_tx_timeout(struct net_device *dev)
 {
struct nicvf *nic = netdev_priv(dev);
 
-   if (netif_msg_tx_err(nic))
-   netdev_warn(dev, "%s: Transmit timed out, resetting\n",
-   dev->name);
+   netif_warn(nic, tx_err, dev, "Transmit timed out, resetting\n");
 
this_cpu_inc(nic->drv_stats->tx_timeout);
schedule_work(>reset_task);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c 
b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 2b181762ad49..d4496e9afcdf 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -1811,11 +1811,9 @@ void nicvf_update_sq_stats(struct nicvf *nic, int

Re: [PATCH] net/mlx4: fix spelling mistake: "enforcment" -> "enforcement"

2017-06-27 Thread Colin Ian King

On 27/06/17 11:33, Tariq Toukan wrote:
> 
> 
> On 27/06/2017 1:02 PM, Colin King wrote:
>> From: Colin Ian King 
>>
>> Trivial fix to spelling mistake in mlx4_dbg debug message
>>
>> Signed-off-by: Colin Ian King 
>> ---
>>   drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c
>> b/drivers/net/ethernet/mellanox/mlx4/cmd.c
>> index c1af47e45d3f..9e4c142c7ecd 100644
>> --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
>> +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
>> @@ -3280,7 +3280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev,
>> int port, int vf, int link_stat
>> if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
>>   mlx4_dbg(dev,
>> - "updating vf %d port %d no link state HW enforcment\n",
>> + "updating vf %d port %d no link state HW enforecment\n",
> Hi Colin,
> You still have a typo. It's "enforcement".
>>vf, port);
>>   return 0;
>>   }
>>
Doh, stupid me. V2 fixes this.

[PATCH][V2] net/mlx4: fix spelling mistake: "enforcment" -> "enforcement"

2017-06-27 Thread Colin King

From: Colin Ian King 

Trivial fix to spelling mistake in mlx4_dbg debug message

Signed-off-by: Colin Ian King 
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c 
b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index c1af47e45d3f..9e4c142c7ecd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -3280,7 +3280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int 
port, int vf, int link_stat
 
if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
mlx4_dbg(dev,
-"updating vf %d port %d no link state HW enforcment\n",
+"updating vf %d port %d no link state HW 
enforcement\n",
 vf, port);
return 0;
 }
-- 
2.11.0

Re: [PATCH v6 05/21] net-next: stmmac: Add dwmac-sun8i

2017-06-27 Thread Andre Przywara

Hi,

On 27/06/17 11:23, Icenowy Zheng wrote:
> 
> 
> 于 2017年6月27日 GMT+08:00 下午6:15:58, Andre Przywara  写到:
>> Hi,
>>
>> On 27/06/17 10:41, Maxime Ripard wrote:
>>> On Tue, Jun 27, 2017 at 10:02:45AM +0100, Andre Przywara wrote:
 Hi,

 (CC:ing some people from that Rockchip dmwac series)

 On 27/06/17 09:21, Corentin Labbe wrote:
> On Tue, Jun 27, 2017 at 04:11:21PM +0800, Chen-Yu Tsai wrote:
>> On Tue, Jun 27, 2017 at 4:05 PM, Corentin Labbe
>>  wrote:
>>> On Mon, Jun 26, 2017 at 01:18:23AM +0100, André Przywara wrote:
 On 31/05/17 08:18, Corentin Labbe wrote:
> The dwmac-sun8i is a heavy hacked version of stmmac hardware by
> allwinner.
> In fact the only common part is the descriptor management and
>> the first
> register function.

 Hi,

 I know I am a bit late with this, but while adapting the U-Boot
>> driver
 to the new binding I was wondering about the internal PHY
>> detection:


 So here you seem to deduce the usage of the internal PHY by the
>> PHY
 interface specified in the DT (MII = internal, RGMII =
>> external).
 I think I raised this question before, but isn't it perfectly
>> legal for
 a board to use MII with an external PHY even on those SoCs that
>> feature
 an internal PHY?
 On the first glance that does not make too much sense, but apart
>> from
 not being the correct binding to describe all of the SoCs
>> features I see
 two scenarios:
 1) A board vendor might choose to not use the internal PHY
>> because it
 has bugs, lacks features (configurability) or has other issues.
>> For
 instance I have heard reports that the internal PHY makes the
>> SoC go
 rather hot, possibly limiting the CPU frequency. By using an
>> external
 MII PHY (which are still cheaper than RGMII PHYs) this can be
>> avoided.
 2) A PHY does not necessarily need to be directly connected to
 magnetics. Indeed quite some boards use (RG)MII to connect to a
>> switch
 IC or some other network circuitry, for instance fibre
>> connectors.

 So I was wondering if we would need an explicit:
   allwinner,use-internal-phy;
 boolean DT property to signal the usage of the internal PHY?
 Alternatively we could go with the negative version:
   allwinner,disable-internal-phy;

 Or what about introducing a new "allwinner,internal-mii-phy"
>> compatible
 string for the *PHY* node and use that?

 I just want to avoid that we introduce a binding that causes us
 headaches later. I think we can still fix this with a followup
>> patch
 before the driver and its binding hit a release kernel.

 Cheers,
 Andre.

>>>
>>> I just see some patch, where "phy-mode = internal" is valid.
>>> I will try to find a way to use it
>>
>> Can you provide a link?
>
> https://lkml.org/lkml/2017/6/23/479
>
>>
>> I'm not a fan of using phy-mode for this. There's no guarantee
>> what
>> mode the internal PHY uses. That's what phy-mode is for.

 I can understand Chen-Yu's concerns, but ...

> For each soc the internal PHY mode is know and setted in
>> emac_variant/internal_phy
> So its not a problem.

 that is true as well, at least for now.

 So while I agree that having a separate property to indicate the
>> usage
 of the internal PHY would be nice, I am bit tempted to use this
>> easier
 approach and piggy back on the existing phy-mode property.
>>>
>>> We're trying to fix an issue that works for now too.
>>>
>>> If we want to consider future weird cases, then we must consider all
>>> of them. And the phy mode changing is definitely not really far
>>> fetched.
>>>
>>> I agree with Chen-Yu, and I really feel like the compatible solution
>>> you suggested would cover both your concerns, and ours.
>>
>> So something like this?
>>  emac: emac@1c3 {
>>  compatible = "allwinner,sun8i-h3-emac";
>>  ...
>>  phy-mode = "mii";
>>  phy-handle = <_mii_phy>;
>>  ...
>>
>>  mdio: mdio {
>>#address-cells = <1>;
>>#size-cells = <0>;
>>int_mii_phy: ethernet-phy@1 {
>>compatible = "allwinner,sun8i-h3-ephy";
>>syscon = <>;
> 
> The MAC still needs to set some bits of syscon register.

Yes, the syscon property needs also to be in the MAC node, that was
meant to be somewhere in the second "..." ;-)

But now since Chen-Yu mentioned that we need to set up the PHY *first*
to make it actually discoverable via MDIO, I wonder if we could change
this to:
1) have the DT as described here
2) Let the dwmac-sun8i

Re: [PATCH] net/mlx4: fix spelling mistake: "enforcment" -> "enforcement"

2017-06-27 Thread Tariq Toukan




On 27/06/2017 1:02 PM, Colin King wrote:

From: Colin Ian King 

Trivial fix to spelling mistake in mlx4_dbg debug message

Signed-off-by: Colin Ian King 
---
  drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c 
b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index c1af47e45d3f..9e4c142c7ecd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -3280,7 +3280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int 
port, int vf, int link_stat
  
  	if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))

mlx4_dbg(dev,
-"updating vf %d port %d no link state HW enforcment\n",
+"updating vf %d port %d no link state HW 
enforecment\n",

Hi Colin,
You still have a typo. It's "enforcement".

 vf, port);
return 0;
  }

[PATCH NET V6 1/2] net: phy: Add phy loopback support in net phy framework

2017-06-27 Thread Lin Yun Sheng

This patch add set_loopback in phy_driver, which is used by Mac
driver to enable or disable a phy. it also add a generic
genphy_loopback function, which use BMCR loopback bit to enable
or disable a phy.

Signed-off-by: Lin Yun Sheng 
---
 drivers/net/phy/marvell.c|  1 +
 drivers/net/phy/phy_device.c | 51 
 include/linux/phy.h  |  5 +
 3 files changed, 57 insertions(+)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 57297ba..01a1586 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -2094,6 +2094,7 @@ static int m88e1510_probe(struct phy_device *phydev)
.get_sset_count = marvell_get_sset_count,
.get_strings = marvell_get_strings,
.get_stats = marvell_get_stats,
+   .set_loopback = genphy_loopback,
},
{
.phy_id = MARVELL_PHY_ID_88E1540,
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 1219eea..1e08d62 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1123,6 +1123,39 @@ int phy_resume(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_resume);
 
+int phy_loopback(struct phy_device *phydev, bool enable)
+{
+   struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
+   int ret = 0;
+
+   mutex_lock(>lock);
+
+   if (enable && phydev->loopback_enabled) {
+   ret = -EBUSY;
+   goto out;
+   }
+
+   if (!enable && !phydev->loopback_enabled) {
+   ret = -EINVAL;
+   goto out;
+   }
+
+   if (phydev->drv && phydrv->set_loopback)
+   ret = phydrv->set_loopback(phydev, enable);
+   else
+   ret = -EOPNOTSUPP;
+
+   if (ret)
+   goto out;
+
+   phydev->loopback_enabled = enable;
+
+out:
+   mutex_unlock(>lock);
+   return ret;
+}
+EXPORT_SYMBOL(phy_loopback);
+
 /* Generic PHY support and helper functions */
 
 /**
@@ -1628,6 +1661,23 @@ static int gen10g_resume(struct phy_device *phydev)
return 0;
 }
 
+int genphy_loopback(struct phy_device *phydev, bool enable)
+{
+   int value;
+
+   value = phy_read(phydev, MII_BMCR);
+   if (value < 0)
+   return value;
+
+   if (enable)
+   value |= BMCR_LOOPBACK;
+   else
+   value &= ~BMCR_LOOPBACK;
+
+   return phy_write(phydev, MII_BMCR, value);
+}
+EXPORT_SYMBOL(genphy_loopback);
+
 static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
 {
/* The default values for phydev->supported are provided by the PHY
@@ -1874,6 +1924,7 @@ void phy_drivers_unregister(struct phy_driver *drv, int n)
.read_status= genphy_read_status,
.suspend= genphy_suspend,
.resume = genphy_resume,
+   .set_loopback   = genphy_loopback,
 }, {
.phy_id = 0x,
.phy_id_mask= 0x,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e76e4ad..49c903dc 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -364,6 +364,7 @@ struct phy_c45_device_ids {
  * is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc.
  * has_fixups: Set to true if this phy has fixups/quirks.
  * suspended: Set to true if this phy has been suspended successfully.
+ * loopback_enabled: Set true if this phy has been loopbacked successfully.
  * state: state of the PHY for management purposes
  * dev_flags: Device-specific flags used by the PHY driver.
  * link_timeout: The number of timer firings to wait before the
@@ -400,6 +401,7 @@ struct phy_device {
bool is_pseudo_fixed_link;
bool has_fixups;
bool suspended;
+   bool loopback_enabled;
 
enum phy_state state;
 
@@ -639,6 +641,7 @@ struct phy_driver {
int (*set_tunable)(struct phy_device *dev,
struct ethtool_tunable *tuna,
const void *data);
+   int (*set_loopback)(struct phy_device *dev, bool enable);
 };
 #define to_phy_driver(d) container_of(to_mdio_common_driver(d),
\
  struct phy_driver, mdiodrv)
@@ -774,6 +777,7 @@ static inline void phy_device_free(struct phy_device 
*phydev) { }
 int phy_init_hw(struct phy_device *phydev);
 int phy_suspend(struct phy_device *phydev);
 int phy_resume(struct phy_device *phydev);
+int phy_loopback(struct phy_device *phydev, bool enable);
 struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
  phy_interface_t interface);
 struct phy_device *phy_find_first(struct mii_bus *bus);
@@ -825,6 +829,7 @@ void phy_attached_print(struct phy_device *phydev, const 
char *fmt, ...)
 int genphy_read_status(struct phy_device *phydev);
 int genphy_suspend(struct phy_device *phydev);
 int genphy_resume(struct phy_device

[PATCH NET V6 2/2] net: hns: Use phy_driver to setup Phy loopback

2017-06-27 Thread Lin Yun Sheng

Use function set_loopback in phy_driver to setup phy loopback
when doing ethtool self test.

Signed-off-by: Lin Yun Sheng 
---
 drivers/net/ethernet/hisilicon/hns/hnae.h|   1 +
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 102 +++
 2 files changed, 32 insertions(+), 71 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h 
b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 04211ac..7ba653a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -360,6 +360,7 @@ enum hnae_loop {
MAC_INTERNALLOOP_MAC = 0,
MAC_INTERNALLOOP_SERDES,
MAC_INTERNALLOOP_PHY,
+   MAC_LOOP_PHY_NONE,
MAC_LOOP_NONE,
 };
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c 
b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index e95795b..7fdd26e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -259,67 +259,24 @@ static int hns_nic_set_link_ksettings(struct net_device 
*net_dev,
 
 static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en)
 {
-#define COPPER_CONTROL_REG 0
-#define PHY_POWER_DOWN BIT(11)
-#define PHY_LOOP_BACK BIT(14)
-   u16 val = 0;
-
-   if (phy_dev->is_c45) /* c45 branch adding for XGE PHY */
-   return -ENOTSUPP;
+   int err;
 
if (en) {
-   /* speed : 1000M */
-   phy_write(phy_dev, HNS_PHY_PAGE_REG, 2);
-   phy_write(phy_dev, 21, 0x1046);
-
-   phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
-   /* Force Master */
-   phy_write(phy_dev, 9, 0x1F00);
-
-   /* Soft-reset */
-   phy_write(phy_dev, 0, 0x9140);
-   /* If autoneg disabled,two soft-reset operations */
-   phy_write(phy_dev, 0, 0x9140);
-
-   phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
-
-   /* Default is 0x0400 */
-   phy_write(phy_dev, 1, 0x418);
-
-   /* Force 1000M Link, Default is 0x0200 */
-   phy_write(phy_dev, 7, 0x20C);
-
-   /* Powerup Fiber */
-   phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
-   val = phy_read(phy_dev, COPPER_CONTROL_REG);
-   val &= ~PHY_POWER_DOWN;
-   phy_write(phy_dev, COPPER_CONTROL_REG, val);
-
-   /* Enable Phy Loopback */
-   phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
-   val = phy_read(phy_dev, COPPER_CONTROL_REG);
-   val |= PHY_LOOP_BACK;
-   val &= ~PHY_POWER_DOWN;
-   phy_write(phy_dev, COPPER_CONTROL_REG, val);
+   err = phy_resume(phy_dev);
+   if (err)
+   goto out;
+
+   err = phy_loopback(phy_dev, true);
} else {
-   phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
-   phy_write(phy_dev, 1, 0x400);
-   phy_write(phy_dev, 7, 0x200);
-
-   phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
-   val = phy_read(phy_dev, COPPER_CONTROL_REG);
-   val |= PHY_POWER_DOWN;
-   phy_write(phy_dev, COPPER_CONTROL_REG, val);
-
-   phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
-   phy_write(phy_dev, 9, 0xF00);
-
-   val = phy_read(phy_dev, COPPER_CONTROL_REG);
-   val &= ~PHY_LOOP_BACK;
-   val |= PHY_POWER_DOWN;
-   phy_write(phy_dev, COPPER_CONTROL_REG, val);
+   err = phy_loopback(phy_dev, false);
+   if (err)
+   goto out;
+
+   err = phy_suspend(phy_dev);
}
-   return 0;
+
+out:
+   return err;
 }
 
 static int __lb_setup(struct net_device *ndev,
@@ -332,10 +289,9 @@ static int __lb_setup(struct net_device *ndev,
 
switch (loop) {
case MAC_INTERNALLOOP_PHY:
-   if ((phy_dev) && (!phy_dev->is_c45)) {
-   ret = hns_nic_config_phy_loopback(phy_dev, 0x1);
-   ret |= h->dev->ops->set_loopback(h, loop, 0x1);
-   }
+   ret = hns_nic_config_phy_loopback(phy_dev, 0x1);
+   if (!ret)
+   ret = h->dev->ops->set_loopback(h, loop, 0x1);
break;
case MAC_INTERNALLOOP_MAC:
if ((h->dev->ops->set_loopback) &&
@@ -346,17 +302,17 @@ static int __lb_setup(struct net_device *ndev,
if (h->dev->ops->set_loopback)
ret = h->dev->ops->set_loopback(h, loop, 0x1);
break;
+   case MAC_LOOP_PHY_NONE:
+   ret = hns_nic_config_phy_loopback(phy_dev, 0x0);
case MAC_LOOP_NONE:
-   if ((phy_dev) && (!phy_dev->is_c45))
-   ret |= hns_nic_config_phy_loopback(phy_dev, 0x0);
-
-   if (h->dev->ops->set_loopback) {
+   if (!ret

[PATCH NET V6 0/2] Add loopback support in phy_driver and hns ethtool fix

2017-06-27 Thread Lin Yun Sheng

This Patch Set add set_loopback in phy_driver and use it to setup loopback
when doing ethtool phy self_test.

Patch V6:
Fix Or'ing error code in __lb_setup.

Patch V5:
Removing non loopback related code change.

Patch V4:
1. Remove c45 checking
2. Add -ENOTSUPP when function pointer is null,
   take mutex in phy_loopback.

Patch V3:
Calling phy_loopback enable and disable in pair in hns mac driver.

Patch V2:
1. Add phy_loopback in phy_device.c.
2. Do error checking and do the read and write once in
   genphy_loopback.
3. Remove gen10g_loopback in phy_device.c.

Patch V1:
Initial Submit

Lin Yun Sheng (2):
  net: phy: Add phy loopback support in net phy framework
  net: hns: Use phy_driver to setup Phy loopback

 drivers/net/ethernet/hisilicon/hns/hnae.h|   1 +
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 102 +++
 drivers/net/phy/marvell.c|   1 +
 drivers/net/phy/phy_device.c |  51 
 include/linux/phy.h  |   5 ++
 5 files changed, 89 insertions(+), 71 deletions(-)

-- 
1.9.1

Re: [PATCH v6 05/21] net-next: stmmac: Add dwmac-sun8i

2017-06-27 Thread Icenowy Zheng



于 2017年6月27日 GMT+08:00 下午6:15:58, Andre Przywara  写到:
>Hi,
>
>On 27/06/17 10:41, Maxime Ripard wrote:
>> On Tue, Jun 27, 2017 at 10:02:45AM +0100, Andre Przywara wrote:
>>> Hi,
>>>
>>> (CC:ing some people from that Rockchip dmwac series)
>>>
>>> On 27/06/17 09:21, Corentin Labbe wrote:
 On Tue, Jun 27, 2017 at 04:11:21PM +0800, Chen-Yu Tsai wrote:
> On Tue, Jun 27, 2017 at 4:05 PM, Corentin Labbe
>  wrote:
>> On Mon, Jun 26, 2017 at 01:18:23AM +0100, André Przywara wrote:
>>> On 31/05/17 08:18, Corentin Labbe wrote:
 The dwmac-sun8i is a heavy hacked version of stmmac hardware by
 allwinner.
 In fact the only common part is the descriptor management and
>the first
 register function.
>>>
>>> Hi,
>>>
>>> I know I am a bit late with this, but while adapting the U-Boot
>driver
>>> to the new binding I was wondering about the internal PHY
>detection:
>>>
>>>
>>> So here you seem to deduce the usage of the internal PHY by the
>PHY
>>> interface specified in the DT (MII = internal, RGMII =
>external).
>>> I think I raised this question before, but isn't it perfectly
>legal for
>>> a board to use MII with an external PHY even on those SoCs that
>feature
>>> an internal PHY?
>>> On the first glance that does not make too much sense, but apart
>from
>>> not being the correct binding to describe all of the SoCs
>features I see
>>> two scenarios:
>>> 1) A board vendor might choose to not use the internal PHY
>because it
>>> has bugs, lacks features (configurability) or has other issues.
>For
>>> instance I have heard reports that the internal PHY makes the
>SoC go
>>> rather hot, possibly limiting the CPU frequency. By using an
>external
>>> MII PHY (which are still cheaper than RGMII PHYs) this can be
>avoided.
>>> 2) A PHY does not necessarily need to be directly connected to
>>> magnetics. Indeed quite some boards use (RG)MII to connect to a
>switch
>>> IC or some other network circuitry, for instance fibre
>connectors.
>>>
>>> So I was wondering if we would need an explicit:
>>>   allwinner,use-internal-phy;
>>> boolean DT property to signal the usage of the internal PHY?
>>> Alternatively we could go with the negative version:
>>>   allwinner,disable-internal-phy;
>>>
>>> Or what about introducing a new "allwinner,internal-mii-phy"
>compatible
>>> string for the *PHY* node and use that?
>>>
>>> I just want to avoid that we introduce a binding that causes us
>>> headaches later. I think we can still fix this with a followup
>patch
>>> before the driver and its binding hit a release kernel.
>>>
>>> Cheers,
>>> Andre.
>>>
>>
>> I just see some patch, where "phy-mode = internal" is valid.
>> I will try to find a way to use it
>
> Can you provide a link?

 https://lkml.org/lkml/2017/6/23/479

>
> I'm not a fan of using phy-mode for this. There's no guarantee
>what
> mode the internal PHY uses. That's what phy-mode is for.
>>>
>>> I can understand Chen-Yu's concerns, but ...
>>>
 For each soc the internal PHY mode is know and setted in
>emac_variant/internal_phy
 So its not a problem.
>>>
>>> that is true as well, at least for now.
>>>
>>> So while I agree that having a separate property to indicate the
>usage
>>> of the internal PHY would be nice, I am bit tempted to use this
>easier
>>> approach and piggy back on the existing phy-mode property.
>> 
>> We're trying to fix an issue that works for now too.
>> 
>> If we want to consider future weird cases, then we must consider all
>> of them. And the phy mode changing is definitely not really far
>> fetched.
>> 
>> I agree with Chen-Yu, and I really feel like the compatible solution
>> you suggested would cover both your concerns, and ours.
>
>So something like this?
>   emac: emac@1c3 {
>   compatible = "allwinner,sun8i-h3-emac";
>   ...
>   phy-mode = "mii";
>   phy-handle = <_mii_phy>;
>   ...
>
>   mdio: mdio {
>#address-cells = <1>;
>#size-cells = <0>;
>int_mii_phy: ethernet-phy@1 {
>compatible = "allwinner,sun8i-h3-ephy";
>syscon = <>;

The MAC still needs to set some bits of syscon register.

>reg = <1>;
>clocks = < CLK_BUS_EPHY>;
>resets = < RST_BUS_EPHY>;
>};
>};
>};
>
>And then move the internal-PHY setup code into a separate PHY driver?
>
>That looks like the architecturally best solution to me, but is
>probably
>also a bit involved since it would require a separate PHY driver.
>Or can we make it simpler, but still use this binding?
>
>Cheers,
>Andre.

Re: [PATCH net-next 1/3] net: ethtool: add support for forward error correction modes

2017-06-27 Thread Jakub Kicinski

On Sat, 24 Jun 2017 12:19:43 -0700, Roopa Prabhu wrote:
> Encoding: Types of encoding
> Off:  Turning off any encoding
> RS :  enforcing RS-FEC encoding on supported speeds
> BaseR  :  enforcing Base R encoding on supported speeds
> Auto   :  IEEE defaults for the speed/medium combination

Just to be sure - does auto mean autonegotiate as defined by IEEE or
some presets?  IIUC there is a notion of different length cables
defaulting to different strength of FEC in 25GE?

Thank you for doing this work!

Re: [PATCH v6 05/21] net-next: stmmac: Add dwmac-sun8i

2017-06-27 Thread Chen-Yu Tsai

On Tue, Jun 27, 2017 at 6:15 PM, Andre Przywara  wrote:
> Hi,
>
> On 27/06/17 10:41, Maxime Ripard wrote:
>> On Tue, Jun 27, 2017 at 10:02:45AM +0100, Andre Przywara wrote:
>>> Hi,
>>>
>>> (CC:ing some people from that Rockchip dmwac series)
>>>
>>> On 27/06/17 09:21, Corentin Labbe wrote:
 On Tue, Jun 27, 2017 at 04:11:21PM +0800, Chen-Yu Tsai wrote:
> On Tue, Jun 27, 2017 at 4:05 PM, Corentin Labbe
>  wrote:
>> On Mon, Jun 26, 2017 at 01:18:23AM +0100, André Przywara wrote:
>>> On 31/05/17 08:18, Corentin Labbe wrote:
 The dwmac-sun8i is a heavy hacked version of stmmac hardware by
 allwinner.
 In fact the only common part is the descriptor management and the first
 register function.
>>>
>>> Hi,
>>>
>>> I know I am a bit late with this, but while adapting the U-Boot driver
>>> to the new binding I was wondering about the internal PHY detection:
>>>
>>>
>>> So here you seem to deduce the usage of the internal PHY by the PHY
>>> interface specified in the DT (MII = internal, RGMII = external).
>>> I think I raised this question before, but isn't it perfectly legal for
>>> a board to use MII with an external PHY even on those SoCs that feature
>>> an internal PHY?
>>> On the first glance that does not make too much sense, but apart from
>>> not being the correct binding to describe all of the SoCs features I see
>>> two scenarios:
>>> 1) A board vendor might choose to not use the internal PHY because it
>>> has bugs, lacks features (configurability) or has other issues. For
>>> instance I have heard reports that the internal PHY makes the SoC go
>>> rather hot, possibly limiting the CPU frequency. By using an external
>>> MII PHY (which are still cheaper than RGMII PHYs) this can be avoided.
>>> 2) A PHY does not necessarily need to be directly connected to
>>> magnetics. Indeed quite some boards use (RG)MII to connect to a switch
>>> IC or some other network circuitry, for instance fibre connectors.
>>>
>>> So I was wondering if we would need an explicit:
>>>   allwinner,use-internal-phy;
>>> boolean DT property to signal the usage of the internal PHY?
>>> Alternatively we could go with the negative version:
>>>   allwinner,disable-internal-phy;
>>>
>>> Or what about introducing a new "allwinner,internal-mii-phy" compatible
>>> string for the *PHY* node and use that?
>>>
>>> I just want to avoid that we introduce a binding that causes us
>>> headaches later. I think we can still fix this with a followup patch
>>> before the driver and its binding hit a release kernel.
>>>
>>> Cheers,
>>> Andre.
>>>
>>
>> I just see some patch, where "phy-mode = internal" is valid.
>> I will try to find a way to use it
>
> Can you provide a link?

 https://lkml.org/lkml/2017/6/23/479

>
> I'm not a fan of using phy-mode for this. There's no guarantee what
> mode the internal PHY uses. That's what phy-mode is for.
>>>
>>> I can understand Chen-Yu's concerns, but ...
>>>
 For each soc the internal PHY mode is know and setted in 
 emac_variant/internal_phy
 So its not a problem.
>>>
>>> that is true as well, at least for now.
>>>
>>> So while I agree that having a separate property to indicate the usage
>>> of the internal PHY would be nice, I am bit tempted to use this easier
>>> approach and piggy back on the existing phy-mode property.
>>
>> We're trying to fix an issue that works for now too.
>>
>> If we want to consider future weird cases, then we must consider all
>> of them. And the phy mode changing is definitely not really far
>> fetched.
>>
>> I agree with Chen-Yu, and I really feel like the compatible solution
>> you suggested would cover both your concerns, and ours.
>
> So something like this?
> emac: emac@1c3 {
> compatible = "allwinner,sun8i-h3-emac";
> ...
> phy-mode = "mii";
> phy-handle = <_mii_phy>;
> ...
>
> mdio: mdio {
> #address-cells = <1>;
> #size-cells = <0>;
> int_mii_phy: ethernet-phy@1 {
> compatible = "allwinner,sun8i-h3-ephy";
> syscon = <>;
> reg = <1>;
> clocks = < CLK_BUS_EPHY>;
> resets = < RST_BUS_EPHY>;
> };
> };
> };
>
> And then move the internal-PHY setup code into a separate PHY driver?
>
> That looks like the architecturally best solution to me, but is probably
> also a bit involved since it would require a separate PHY driver.
> Or can we make it simpler, but still use this binding?

This was my initial approach prior to handing it off to Corentin.

The MDIO bus is discoverable, so in the kernel

Re: [linux-sunxi] Re: [PATCH v6 05/21] net-next: stmmac: Add dwmac-sun8i

2017-06-27 Thread Chen-Yu Tsai

On Tue, Jun 27, 2017 at 6:17 PM, Icenowy Zheng  wrote:
>
>
> 于 2017年6月27日 GMT+08:00 下午6:11:47, Chen-Yu Tsai  写到:
>>On Tue, Jun 27, 2017 at 5:41 PM, Maxime Ripard
>> wrote:
>>> On Tue, Jun 27, 2017 at 10:02:45AM +0100, Andre Przywara wrote:
 Hi,

 (CC:ing some people from that Rockchip dmwac series)

 On 27/06/17 09:21, Corentin Labbe wrote:
 > On Tue, Jun 27, 2017 at 04:11:21PM +0800, Chen-Yu Tsai wrote:
 >> On Tue, Jun 27, 2017 at 4:05 PM, Corentin Labbe
 >>  wrote:
 >>> On Mon, Jun 26, 2017 at 01:18:23AM +0100, André Przywara wrote:
  On 31/05/17 08:18, Corentin Labbe wrote:
 > The dwmac-sun8i is a heavy hacked version of stmmac hardware
>>by
 > allwinner.
 > In fact the only common part is the descriptor management and
>>the first
 > register function.
 
  Hi,
 
  I know I am a bit late with this, but while adapting the U-Boot
>>driver
  to the new binding I was wondering about the internal PHY
>>detection:
 
 
  So here you seem to deduce the usage of the internal PHY by the
>>PHY
  interface specified in the DT (MII = internal, RGMII =
>>external).
  I think I raised this question before, but isn't it perfectly
>>legal for
  a board to use MII with an external PHY even on those SoCs that
>>feature
  an internal PHY?
  On the first glance that does not make too much sense, but
>>apart from
  not being the correct binding to describe all of the SoCs
>>features I see
  two scenarios:
  1) A board vendor might choose to not use the internal PHY
>>because it
  has bugs, lacks features (configurability) or has other issues.
>>For
  instance I have heard reports that the internal PHY makes the
>>SoC go
  rather hot, possibly limiting the CPU frequency. By using an
>>external
  MII PHY (which are still cheaper than RGMII PHYs) this can be
>>avoided.
  2) A PHY does not necessarily need to be directly connected to
  magnetics. Indeed quite some boards use (RG)MII to connect to a
>>switch
  IC or some other network circuitry, for instance fibre
>>connectors.
 
  So I was wondering if we would need an explicit:
    allwinner,use-internal-phy;
  boolean DT property to signal the usage of the internal PHY?
  Alternatively we could go with the negative version:
    allwinner,disable-internal-phy;
 
  Or what about introducing a new "allwinner,internal-mii-phy"
>>compatible
  string for the *PHY* node and use that?
 
  I just want to avoid that we introduce a binding that causes us
  headaches later. I think we can still fix this with a followup
>>patch
  before the driver and its binding hit a release kernel.
 
  Cheers,
  Andre.
 
 >>>
 >>> I just see some patch, where "phy-mode = internal" is valid.
 >>> I will try to find a way to use it
 >>
 >> Can you provide a link?
 >
 > https://lkml.org/lkml/2017/6/23/479
 >
 >>
 >> I'm not a fan of using phy-mode for this. There's no guarantee
>>what
 >> mode the internal PHY uses. That's what phy-mode is for.

 I can understand Chen-Yu's concerns, but ...

 > For each soc the internal PHY mode is know and setted in
>>emac_variant/internal_phy
 > So its not a problem.

 that is true as well, at least for now.

 So while I agree that having a separate property to indicate the
>>usage
 of the internal PHY would be nice, I am bit tempted to use this
>>easier
 approach and piggy back on the existing phy-mode property.
>>>
>>> We're trying to fix an issue that works for now too.
>>>
>>> If we want to consider future weird cases, then we must consider all
>>> of them. And the phy mode changing is definitely not really far
>>> fetched.
>>
>>I guess the issue is whether it's likely that the vendor puts 2
>>internal
>>PHYs in one SoC, and they use different modes and can be switched
>>around.
>>Otherwise it's fixed for a given SoC, and we can just handle that with
>>the per-SoC GMAC compatible.
>>
>>Maybe Florian could tell us if this was one of the intended use cases
>>for the "internal" phy mode.
>>
>>As for Rockchip, AFAIK they have 2 MACs, one is connected to the
>>internal
>>PHY, while the other is connected to the external interface, and there
>>is
>>no muxing involved, unlike Allwinner's solution.
>>
>>> I agree with Chen-Yu, and I really feel like the compatible solution
>>> you suggested would cover both your concerns, and ours.
>>
>>If using a PHY compatible is the solution, we could just use the
>>"ethernet-phy-id." style one, and put in the bogus ID that
>>Allwinner used.
>>
>>Care must

Re: [linux-sunxi] Re: [PATCH v6 05/21] net-next: stmmac: Add dwmac-sun8i

2017-06-27 Thread Icenowy Zheng



于 2017年6月27日 GMT+08:00 下午6:11:47, Chen-Yu Tsai  写到:
>On Tue, Jun 27, 2017 at 5:41 PM, Maxime Ripard
> wrote:
>> On Tue, Jun 27, 2017 at 10:02:45AM +0100, Andre Przywara wrote:
>>> Hi,
>>>
>>> (CC:ing some people from that Rockchip dmwac series)
>>>
>>> On 27/06/17 09:21, Corentin Labbe wrote:
>>> > On Tue, Jun 27, 2017 at 04:11:21PM +0800, Chen-Yu Tsai wrote:
>>> >> On Tue, Jun 27, 2017 at 4:05 PM, Corentin Labbe
>>> >>  wrote:
>>> >>> On Mon, Jun 26, 2017 at 01:18:23AM +0100, André Przywara wrote:
>>>  On 31/05/17 08:18, Corentin Labbe wrote:
>>> > The dwmac-sun8i is a heavy hacked version of stmmac hardware
>by
>>> > allwinner.
>>> > In fact the only common part is the descriptor management and
>the first
>>> > register function.
>>> 
>>>  Hi,
>>> 
>>>  I know I am a bit late with this, but while adapting the U-Boot
>driver
>>>  to the new binding I was wondering about the internal PHY
>detection:
>>> 
>>> 
>>>  So here you seem to deduce the usage of the internal PHY by the
>PHY
>>>  interface specified in the DT (MII = internal, RGMII =
>external).
>>>  I think I raised this question before, but isn't it perfectly
>legal for
>>>  a board to use MII with an external PHY even on those SoCs that
>feature
>>>  an internal PHY?
>>>  On the first glance that does not make too much sense, but
>apart from
>>>  not being the correct binding to describe all of the SoCs
>features I see
>>>  two scenarios:
>>>  1) A board vendor might choose to not use the internal PHY
>because it
>>>  has bugs, lacks features (configurability) or has other issues.
>For
>>>  instance I have heard reports that the internal PHY makes the
>SoC go
>>>  rather hot, possibly limiting the CPU frequency. By using an
>external
>>>  MII PHY (which are still cheaper than RGMII PHYs) this can be
>avoided.
>>>  2) A PHY does not necessarily need to be directly connected to
>>>  magnetics. Indeed quite some boards use (RG)MII to connect to a
>switch
>>>  IC or some other network circuitry, for instance fibre
>connectors.
>>> 
>>>  So I was wondering if we would need an explicit:
>>>    allwinner,use-internal-phy;
>>>  boolean DT property to signal the usage of the internal PHY?
>>>  Alternatively we could go with the negative version:
>>>    allwinner,disable-internal-phy;
>>> 
>>>  Or what about introducing a new "allwinner,internal-mii-phy"
>compatible
>>>  string for the *PHY* node and use that?
>>> 
>>>  I just want to avoid that we introduce a binding that causes us
>>>  headaches later. I think we can still fix this with a followup
>patch
>>>  before the driver and its binding hit a release kernel.
>>> 
>>>  Cheers,
>>>  Andre.
>>> 
>>> >>>
>>> >>> I just see some patch, where "phy-mode = internal" is valid.
>>> >>> I will try to find a way to use it
>>> >>
>>> >> Can you provide a link?
>>> >
>>> > https://lkml.org/lkml/2017/6/23/479
>>> >
>>> >>
>>> >> I'm not a fan of using phy-mode for this. There's no guarantee
>what
>>> >> mode the internal PHY uses. That's what phy-mode is for.
>>>
>>> I can understand Chen-Yu's concerns, but ...
>>>
>>> > For each soc the internal PHY mode is know and setted in
>emac_variant/internal_phy
>>> > So its not a problem.
>>>
>>> that is true as well, at least for now.
>>>
>>> So while I agree that having a separate property to indicate the
>usage
>>> of the internal PHY would be nice, I am bit tempted to use this
>easier
>>> approach and piggy back on the existing phy-mode property.
>>
>> We're trying to fix an issue that works for now too.
>>
>> If we want to consider future weird cases, then we must consider all
>> of them. And the phy mode changing is definitely not really far
>> fetched.
>
>I guess the issue is whether it's likely that the vendor puts 2
>internal
>PHYs in one SoC, and they use different modes and can be switched
>around.
>Otherwise it's fixed for a given SoC, and we can just handle that with
>the per-SoC GMAC compatible.
>
>Maybe Florian could tell us if this was one of the intended use cases
>for the "internal" phy mode.
>
>As for Rockchip, AFAIK they have 2 MACs, one is connected to the
>internal
>PHY, while the other is connected to the external interface, and there
>is
>no muxing involved, unlike Allwinner's solution.
>
>> I agree with Chen-Yu, and I really feel like the compatible solution
>> you suggested would cover both your concerns, and ours.
>
>If using a PHY compatible is the solution, we could just use the
>"ethernet-phy-id." style one, and put in the bogus ID that
>Allwinner used.
>
>Care must be taken to put this at the board level for boards using
>the internal PHY, or we'd have to delete or override the property
>in all other boards.
>
>Ideally I think the internal PHY device node should _not_ be

Re: [PATCH v6 05/21] net-next: stmmac: Add dwmac-sun8i

2017-06-27 Thread Andre Przywara

Hi,

On 27/06/17 10:41, Maxime Ripard wrote:
> On Tue, Jun 27, 2017 at 10:02:45AM +0100, Andre Przywara wrote:
>> Hi,
>>
>> (CC:ing some people from that Rockchip dmwac series)
>>
>> On 27/06/17 09:21, Corentin Labbe wrote:
>>> On Tue, Jun 27, 2017 at 04:11:21PM +0800, Chen-Yu Tsai wrote:
 On Tue, Jun 27, 2017 at 4:05 PM, Corentin Labbe
  wrote:
> On Mon, Jun 26, 2017 at 01:18:23AM +0100, André Przywara wrote:
>> On 31/05/17 08:18, Corentin Labbe wrote:
>>> The dwmac-sun8i is a heavy hacked version of stmmac hardware by
>>> allwinner.
>>> In fact the only common part is the descriptor management and the first
>>> register function.
>>
>> Hi,
>>
>> I know I am a bit late with this, but while adapting the U-Boot driver
>> to the new binding I was wondering about the internal PHY detection:
>>
>>
>> So here you seem to deduce the usage of the internal PHY by the PHY
>> interface specified in the DT (MII = internal, RGMII = external).
>> I think I raised this question before, but isn't it perfectly legal for
>> a board to use MII with an external PHY even on those SoCs that feature
>> an internal PHY?
>> On the first glance that does not make too much sense, but apart from
>> not being the correct binding to describe all of the SoCs features I see
>> two scenarios:
>> 1) A board vendor might choose to not use the internal PHY because it
>> has bugs, lacks features (configurability) or has other issues. For
>> instance I have heard reports that the internal PHY makes the SoC go
>> rather hot, possibly limiting the CPU frequency. By using an external
>> MII PHY (which are still cheaper than RGMII PHYs) this can be avoided.
>> 2) A PHY does not necessarily need to be directly connected to
>> magnetics. Indeed quite some boards use (RG)MII to connect to a switch
>> IC or some other network circuitry, for instance fibre connectors.
>>
>> So I was wondering if we would need an explicit:
>>   allwinner,use-internal-phy;
>> boolean DT property to signal the usage of the internal PHY?
>> Alternatively we could go with the negative version:
>>   allwinner,disable-internal-phy;
>>
>> Or what about introducing a new "allwinner,internal-mii-phy" compatible
>> string for the *PHY* node and use that?
>>
>> I just want to avoid that we introduce a binding that causes us
>> headaches later. I think we can still fix this with a followup patch
>> before the driver and its binding hit a release kernel.
>>
>> Cheers,
>> Andre.
>>
>
> I just see some patch, where "phy-mode = internal" is valid.
> I will try to find a way to use it

 Can you provide a link?
>>>
>>> https://lkml.org/lkml/2017/6/23/479
>>>

 I'm not a fan of using phy-mode for this. There's no guarantee what
 mode the internal PHY uses. That's what phy-mode is for.
>>
>> I can understand Chen-Yu's concerns, but ...
>>
>>> For each soc the internal PHY mode is know and setted in 
>>> emac_variant/internal_phy
>>> So its not a problem.
>>
>> that is true as well, at least for now.
>>
>> So while I agree that having a separate property to indicate the usage
>> of the internal PHY would be nice, I am bit tempted to use this easier
>> approach and piggy back on the existing phy-mode property.
> 
> We're trying to fix an issue that works for now too.
> 
> If we want to consider future weird cases, then we must consider all
> of them. And the phy mode changing is definitely not really far
> fetched.
> 
> I agree with Chen-Yu, and I really feel like the compatible solution
> you suggested would cover both your concerns, and ours.

So something like this?
emac: emac@1c3 {
compatible = "allwinner,sun8i-h3-emac";
...
phy-mode = "mii";
phy-handle = <_mii_phy>;
...

mdio: mdio {
#address-cells = <1>;
#size-cells = <0>;
int_mii_phy: ethernet-phy@1 {
compatible = "allwinner,sun8i-h3-ephy";
syscon = <>;
reg = <1>;
clocks = < CLK_BUS_EPHY>;
resets = < RST_BUS_EPHY>;
};
};
};

And then move the internal-PHY setup code into a separate PHY driver?

That looks like the architecturally best solution to me, but is probably
also a bit involved since it would require a separate PHY driver.
Or can we make it simpler, but still use this binding?

Cheers,
Andre.

Re: [PATCH v6 05/21] net-next: stmmac: Add dwmac-sun8i

2017-06-27 Thread Chen-Yu Tsai

On Tue, Jun 27, 2017 at 5:41 PM, Maxime Ripard
 wrote:
> On Tue, Jun 27, 2017 at 10:02:45AM +0100, Andre Przywara wrote:
>> Hi,
>>
>> (CC:ing some people from that Rockchip dmwac series)
>>
>> On 27/06/17 09:21, Corentin Labbe wrote:
>> > On Tue, Jun 27, 2017 at 04:11:21PM +0800, Chen-Yu Tsai wrote:
>> >> On Tue, Jun 27, 2017 at 4:05 PM, Corentin Labbe
>> >>  wrote:
>> >>> On Mon, Jun 26, 2017 at 01:18:23AM +0100, André Przywara wrote:
>>  On 31/05/17 08:18, Corentin Labbe wrote:
>> > The dwmac-sun8i is a heavy hacked version of stmmac hardware by
>> > allwinner.
>> > In fact the only common part is the descriptor management and the first
>> > register function.
>> 
>>  Hi,
>> 
>>  I know I am a bit late with this, but while adapting the U-Boot driver
>>  to the new binding I was wondering about the internal PHY detection:
>> 
>> 
>>  So here you seem to deduce the usage of the internal PHY by the PHY
>>  interface specified in the DT (MII = internal, RGMII = external).
>>  I think I raised this question before, but isn't it perfectly legal for
>>  a board to use MII with an external PHY even on those SoCs that feature
>>  an internal PHY?
>>  On the first glance that does not make too much sense, but apart from
>>  not being the correct binding to describe all of the SoCs features I see
>>  two scenarios:
>>  1) A board vendor might choose to not use the internal PHY because it
>>  has bugs, lacks features (configurability) or has other issues. For
>>  instance I have heard reports that the internal PHY makes the SoC go
>>  rather hot, possibly limiting the CPU frequency. By using an external
>>  MII PHY (which are still cheaper than RGMII PHYs) this can be avoided.
>>  2) A PHY does not necessarily need to be directly connected to
>>  magnetics. Indeed quite some boards use (RG)MII to connect to a switch
>>  IC or some other network circuitry, for instance fibre connectors.
>> 
>>  So I was wondering if we would need an explicit:
>>    allwinner,use-internal-phy;
>>  boolean DT property to signal the usage of the internal PHY?
>>  Alternatively we could go with the negative version:
>>    allwinner,disable-internal-phy;
>> 
>>  Or what about introducing a new "allwinner,internal-mii-phy" compatible
>>  string for the *PHY* node and use that?
>> 
>>  I just want to avoid that we introduce a binding that causes us
>>  headaches later. I think we can still fix this with a followup patch
>>  before the driver and its binding hit a release kernel.
>> 
>>  Cheers,
>>  Andre.
>> 
>> >>>
>> >>> I just see some patch, where "phy-mode = internal" is valid.
>> >>> I will try to find a way to use it
>> >>
>> >> Can you provide a link?
>> >
>> > https://lkml.org/lkml/2017/6/23/479
>> >
>> >>
>> >> I'm not a fan of using phy-mode for this. There's no guarantee what
>> >> mode the internal PHY uses. That's what phy-mode is for.
>>
>> I can understand Chen-Yu's concerns, but ...
>>
>> > For each soc the internal PHY mode is know and setted in 
>> > emac_variant/internal_phy
>> > So its not a problem.
>>
>> that is true as well, at least for now.
>>
>> So while I agree that having a separate property to indicate the usage
>> of the internal PHY would be nice, I am bit tempted to use this easier
>> approach and piggy back on the existing phy-mode property.
>
> We're trying to fix an issue that works for now too.
>
> If we want to consider future weird cases, then we must consider all
> of them. And the phy mode changing is definitely not really far
> fetched.

I guess the issue is whether it's likely that the vendor puts 2 internal
PHYs in one SoC, and they use different modes and can be switched around.
Otherwise it's fixed for a given SoC, and we can just handle that with
the per-SoC GMAC compatible.

Maybe Florian could tell us if this was one of the intended use cases
for the "internal" phy mode.

As for Rockchip, AFAIK they have 2 MACs, one is connected to the internal
PHY, while the other is connected to the external interface, and there is
no muxing involved, unlike Allwinner's solution.

> I agree with Chen-Yu, and I really feel like the compatible solution
> you suggested would cover both your concerns, and ours.

If using a PHY compatible is the solution, we could just use the
"ethernet-phy-id." style one, and put in the bogus ID that
Allwinner used.

Care must be taken to put this at the board level for boards using
the internal PHY, or we'd have to delete or override the property
in all other boards.

Ideally I think the internal PHY device node should _not_ be in
the SoC level .dtsi file. If we select the external interface, then
there's no connection to the internal PHY, and that device node becomes
unusable and bogus. This is something I think should be

RE: bug report: hairpin NAT doesn't work across bridges

2017-06-27 Thread Russell Stuart

I don't know how the unicode non-breaking spaces leaked into the
previous version.  Sorry about that.

Configuration
=

  A box running Debian stretch is acting as a NAT'ing router.
  It has a single Ethernet NIC and a wireless NIC servicing the local
  LAN.  These devices are bridged.  Since it has only one wired NIC
  it is used to connect to both the LAN and internet via a switch.
  This means it must do hairpin NAT over the wired NIC.

  internet <--> modem<--> switch <--> LAN
[10.99.99.97/30] ^[10.91.91.0/24]
 |^
  +--+   ||
  |  [10.91.91.1/24] eth0=<--/  v antenna LAN |
  |  [10.99.99.98/30] br0<---+   |  | [10.91.91.0/24] |
  | wlan0=<-/ v
  |  |+---=--+
  | ip r a default via 10.99.99.97   || eth-lan0 |
  | iptables -t nat -A POSTROUTING \ || 10.91.91.129/24  |
  |   -s 10.91.91.0/24 -j MASQUERADE ||  |
  +--+| ip r a default \ |
  |  via 10.91.91.1  |
  +--+

  While wlan0 is the reason for bridge exists in my case it doesn't
  have to be a wireless connection.  Connecting any two Ethernet
  devices to the bridge (so it has to do some work) triggers the
  problem.


Problem
===

  10.91.91.129 can not receive packets from the internet.  A packet
  arriving from the internet hits eth0, then br0, then is mangled by
  iptables nat, and then is supposed to be sent out br0+eth0 again.
  The mangled version never makes it out of eth0.


Possible cause
==

  The bridge is implementing it's "never send a packet out over the
  interface it arrived on rule" but it this case it's misapplied the
  rule: the packet that is to be sent is not the same packet that
  arrived earlier on eth0. It has different source and destination IP
  addresses and MAC addresses, and in any case is not being reflected -
  it hit the INPUT chain, not the FORWARD chain.


Workarounds
===

  Set the "hairpin" flag on br0.  This works if are to be no loops in
  the LAN wiring (which will normally be hidden by STP).  If there
  are a packet storm will soon ensue, followed in my case by chaos
  and panic.

  An alternate workaround that mostly works is the use ebtables to
  make internet packets bypass the bridge:

ebtables -t broute -A BROUTING -d Multicast -j ACCEPT
ebtables -t broute -A BROUTING -p IPv4 --ip-dst 10.0.0.0/8 -j ACCEPT
ebtables -t broute -A BROUTING -p IPv4 --ip-dst 172.16.0.0/12 -j ACCEPT
ebtables -t broute -A BROUTING -p IPv4 --ip-dst 169.254.0.0/16 -j ACCEPT
ebtables -t broute -A BROUTING -p IPv4 --ip-dst 192.168.0.0/16 -j ACCEPT
ebtables -t broute -A BROUTING -p IPv4 --ip-src 10.0.0.0/8 -j ACCEPT
ebtables -t broute -A BROUTING -p IPv4 --ip-src 172.16.0.0/12 -j ACCEPT
ebtables -t broute -A BROUTING -p IPv4 --ip-src 169.254.0.0/16 -j ACCEPT
ebtables -t broute -A BROUTING -p IPv4 --ip-src 192.168.0.0/16 -j ACCEPT
ebtables -t broute -A BROUTING -p IPv4 -j DROP
ebtables -t broute -A BROUTING -p IPv6 --ip6-dst fc00::/fc00:: -j ACCEPT
ebtables -t broute -A BROUTING -p IPv6 --ip6-src fc00::/fc00:: -j ACCEPT
ebtables -t broute -A BROUTING -p IPv6 -j DROP

  It only "mostly" works because it fails with OpenVPN.  OpenVPN gets
  TLS errors if the incoming packets don't go via the bridge.


Reproducing
===

  Run the shell script below.  The shell script sets up the
  configuration shown in the diagram above using debootstrap to
  create a minimal file system and containers created by
  systemd-nspawn.  debootstrap is a Debian utility, but is
  available on Fedora.

  Invoking it using "hairpin-bug.sh bridge" creates the conditions
  show in the diagram and produces the following output on kernels that
  have the problem (spurious selinux warnings produced by systemd-nspawn
  have been omitted for clarity):

  PING 10.99.99.90 (10.99.99.90) 56(84) bytes of data.

  --- 10.99.99.90 ping statistics ---
  1 packets transmitted, 0 received, 100% packet loss, time 0ms

  The script doesn't need an internet to connection to work as it
  "emulates" it.   10.99.99.90 is the one and only address on this
  emulated internet.

  Invoking it using "hairpin-bug.sh direct" creates the conditions
  show in the diagram with one exception: the eth0 device is not
  connected to the br0, and IP addresses assigned to br0 have been
  moved to eth0.  The output in that case is:

  PING 10.99.99.90 (10.99.99.90) 56(84) bytes of data.
  64 bytes from 10.99.99.90: icmp_seq=1 ttl=63 time=0.080 ms

  --- 10.99.99.90 ping statistics ---
  1 packets transmitted, 1

Re: [PATCH net-next] netfilter: conntrack: add a new NF_CT_EXT_EXPAND extension

2017-06-27 Thread kbuild test robot

Hi Lin,

[auto build test ERROR on net-next/master]

url:
https://github.com/0day-ci/linux/commits/Lin-Zhang/netfilter-conntrack-add-a-new-NF_CT_EXT_EXPAND-extension/20170627-000844
config: i386-allyesconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   In function 'total_extension_size',
   inlined from 'nf_conntrack_init_start' at 
net//netfilter/nf_conntrack_core.c:1893:25:
>> net//netfilter/nf_conntrack_core.c:1859:171: error: call to 
>> '__compiletime_assert_1859' declared with attribute error: BUILD_BUG_ON 
>> failed: NF_CT_EXT_NUM > 9
 BUILD_BUG_ON(NF_CT_EXT_NUM > 9);


  ^  

vim +/__compiletime_assert_1859 +1859 net//netfilter/nf_conntrack_core.c

fae718dda Patrick McHardy2007-12-24  1853  module_param_call(hashsize, 
nf_conntrack_set_hashsize, param_get_uint,
9fb9cbb10 Yasuyuki Kozakai   2005-11-09  1854 
_conntrack_htable_size, 0600);
9fb9cbb10 Yasuyuki Kozakai   2005-11-09  1855  
ab71632c4 Geert Uytterhoeven 2017-05-03  1856  static __always_inline 
unsigned int total_extension_size(void)
b3a5db109 Florian Westphal   2017-04-16  1857  {
b3a5db109 Florian Westphal   2017-04-16  1858   /* remember to add new 
extensions below */
b3a5db109 Florian Westphal   2017-04-16 @1859   
BUILD_BUG_ON(NF_CT_EXT_NUM > 9);
b3a5db109 Florian Westphal   2017-04-16  1860  
b3a5db109 Florian Westphal   2017-04-16  1861   return sizeof(struct 
nf_ct_ext) +
b3a5db109 Florian Westphal   2017-04-16  1862  sizeof(struct 
nf_conn_help)
b3a5db109 Florian Westphal   2017-04-16  1863  #if IS_ENABLED(CONFIG_NF_NAT)
b3a5db109 Florian Westphal   2017-04-16  1864   + sizeof(struct 
nf_conn_nat)
b3a5db109 Florian Westphal   2017-04-16  1865  #endif
b3a5db109 Florian Westphal   2017-04-16  1866   + sizeof(struct 
nf_conn_seqadj)
b3a5db109 Florian Westphal   2017-04-16  1867   + sizeof(struct 
nf_conn_acct)
b3a5db109 Florian Westphal   2017-04-16  1868  #ifdef 
CONFIG_NF_CONNTRACK_EVENTS
b3a5db109 Florian Westphal   2017-04-16  1869   + sizeof(struct 
nf_conntrack_ecache)
b3a5db109 Florian Westphal   2017-04-16  1870  #endif
b3a5db109 Florian Westphal   2017-04-16  1871  #ifdef 
CONFIG_NF_CONNTRACK_TIMESTAMP
b3a5db109 Florian Westphal   2017-04-16  1872   + sizeof(struct 
nf_conn_tstamp)
b3a5db109 Florian Westphal   2017-04-16  1873  #endif
b3a5db109 Florian Westphal   2017-04-16  1874  #ifdef 
CONFIG_NF_CONNTRACK_TIMEOUT
b3a5db109 Florian Westphal   2017-04-16  1875   + sizeof(struct 
nf_conn_timeout)
b3a5db109 Florian Westphal   2017-04-16  1876  #endif
b3a5db109 Florian Westphal   2017-04-16  1877  #ifdef 
CONFIG_NF_CONNTRACK_LABELS
b3a5db109 Florian Westphal   2017-04-16  1878   + sizeof(struct 
nf_conn_labels)
b3a5db109 Florian Westphal   2017-04-16  1879  #endif
b3a5db109 Florian Westphal   2017-04-16  1880  #if 
IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
b3a5db109 Florian Westphal   2017-04-16  1881   + sizeof(struct 
nf_conn_synproxy)
b3a5db109 Florian Westphal   2017-04-16  1882  #endif
b3a5db109 Florian Westphal   2017-04-16  1883   ;
b3a5db109 Florian Westphal   2017-04-16  1884  };
b3a5db109 Florian Westphal   2017-04-16  1885  
f94161c1b Gao feng   2013-01-21  1886  int 
nf_conntrack_init_start(void)
9fb9cbb10 Yasuyuki Kozakai   2005-11-09  1887  {
f205c5e0c Patrick McHardy2007-07-07  1888   int max_factor = 8;
0c5366b3a Florian Westphal   2016-05-09  1889   int ret = -ENOMEM;
cc41c84b7 Florian Westphal   2017-04-14  1890   int i;
93bb0ceb7 Jesper Dangaard Brouer 2014-03-03  1891  
b3a5db109 Florian Westphal   2017-04-16  1892   /* struct nf_ct_ext 
uses u8 to store offsets/size */
b3a5db109 Florian Westphal   2017-04-16 @1893   
BUILD_BUG_ON(total_extension_size() > 255u);
b3a5db109 Florian Westphal   2017-04-16  1894  
a3efd8120 Florian Westphal   2016-04-18  1895   
seqcount_init(_conntrack_generation);
a3efd8120 Florian Westphal   2016-04-18  1896  

:: The code at line 1859 was first introduced by commit
:: b3a5db109e0670d6d168e9cd9de4d272a68f7c35 netfilter: conntrack: use u8 
for extension sizes again

:: TO: Florian Westphal <f...@strlen.de>
:: CC: Pablo Neira Ayuso <pa...@netfilter.org>

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip

[PATCH] net/mlx4: fix spelling mistake: "enforcment" -> "enforcement"

2017-06-27 Thread Colin King

From: Colin Ian King 

Trivial fix to spelling mistake in mlx4_dbg debug message

Signed-off-by: Colin Ian King 
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c 
b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index c1af47e45d3f..9e4c142c7ecd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -3280,7 +3280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int 
port, int vf, int link_stat
 
if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
mlx4_dbg(dev,
-"updating vf %d port %d no link state HW enforcment\n",
+"updating vf %d port %d no link state HW 
enforecment\n",
 vf, port);
return 0;
 }
-- 
2.11.0

[PATCH] net: atl1c: fix spelling mistake: "droppted" -> "dropped"

2017-06-27 Thread Colin King

From: Colin Ian King 

Trivial fix to spelling mistake in netif_info message

Signed-off-by: Colin Ian King 
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c 
b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 7e913d8331c3..8c9986f3fc01 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2252,7 +2252,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
 
if (atl1c_tx_map(adapter, skb, tpd, type) < 0) {
netif_info(adapter, tx_done, adapter->netdev,
-  "tx-skb droppted due to dma error\n");
+  "tx-skb dropped due to dma error\n");
/* roll back tpd/buffer */
atl1c_tx_rollback(adapter, tpd, type);
dev_kfree_skb_any(skb);
-- 
2.11.0

Re: Bluetooth: might sleep error in hidp_session_thread

2017-06-27 Thread jeffy


Hi Rohit,

On 06/24/2017 02:00 AM, Rohit Vaswani wrote:

I don't have a way to reply back to the older message; but you can use by 
tested-by for the below patch and re-send:


ok, i've resent it, thanks for your test by~

and for replying to an old message, i've figured it out when i tried to 
do that recently:


1/ download the mbox file from the patchwork 
link(https://patchwork.kernel.org/patch/***)

2/ rename it to .mbox
3/ import into thunerbird(using ImportExportTools)

hope that helps ;)


For patch: [v4,3/3] Bluetooth: hidp: fix possible might sleep error in 
hidp_session_thread

Tested-by: Rohit Vaswani 

-Rohit

-Original Message-
From: jeffy [mailto:jeffy.c...@rock-chips.com]
Sent: Friday, June 23, 2017 05:39
To: Rohit Vaswani; linux-blueto...@vger.kernel.org
Cc: Brian Norris; Douglas Anderson; Johan Hedberg; Peter Hurley; Johan Hedberg; 
netdev@vger.kernel.org; linux-ker...@vger.kernel.org; David S. Miller; Marcel 
Holtmann; Gustavo Padovan
Subject: Re: Bluetooth: might sleep error in hidp_session_thread

Hi Rohit,

Thanx for your reply, and sorry to the delay, somehow your mail was marked as 
spam by the mail server :(

On 06/13/2017 02:31 AM, Rohit Vaswani wrote:

Hi Jeffy,

I was looking into the patch from Jeffy Chen from February 14 2017  :
   [v4,3/3] Bluetooth: hidp: fix possible might sleep error in
hidp_session_thread: https://patchwork.kernel.org/patch/9570931/

We faced a similar issue and this patch seems to fix the problem in our 
preliminary test.
I am trying to check if there was a reason this wasn't merged earlier ?


hmm, i'm not sure why, but please feel free to add your test-by~


Thanks,
Rohit

nvpublic

Re: [PATCH v6 05/21] net-next: stmmac: Add dwmac-sun8i

2017-06-27 Thread Maxime Ripard

On Tue, Jun 27, 2017 at 10:02:45AM +0100, Andre Przywara wrote:
> Hi,
> 
> (CC:ing some people from that Rockchip dmwac series)
> 
> On 27/06/17 09:21, Corentin Labbe wrote:
> > On Tue, Jun 27, 2017 at 04:11:21PM +0800, Chen-Yu Tsai wrote:
> >> On Tue, Jun 27, 2017 at 4:05 PM, Corentin Labbe
> >>  wrote:
> >>> On Mon, Jun 26, 2017 at 01:18:23AM +0100, André Przywara wrote:
>  On 31/05/17 08:18, Corentin Labbe wrote:
> > The dwmac-sun8i is a heavy hacked version of stmmac hardware by
> > allwinner.
> > In fact the only common part is the descriptor management and the first
> > register function.
> 
>  Hi,
> 
>  I know I am a bit late with this, but while adapting the U-Boot driver
>  to the new binding I was wondering about the internal PHY detection:
> 
> 
>  So here you seem to deduce the usage of the internal PHY by the PHY
>  interface specified in the DT (MII = internal, RGMII = external).
>  I think I raised this question before, but isn't it perfectly legal for
>  a board to use MII with an external PHY even on those SoCs that feature
>  an internal PHY?
>  On the first glance that does not make too much sense, but apart from
>  not being the correct binding to describe all of the SoCs features I see
>  two scenarios:
>  1) A board vendor might choose to not use the internal PHY because it
>  has bugs, lacks features (configurability) or has other issues. For
>  instance I have heard reports that the internal PHY makes the SoC go
>  rather hot, possibly limiting the CPU frequency. By using an external
>  MII PHY (which are still cheaper than RGMII PHYs) this can be avoided.
>  2) A PHY does not necessarily need to be directly connected to
>  magnetics. Indeed quite some boards use (RG)MII to connect to a switch
>  IC or some other network circuitry, for instance fibre connectors.
> 
>  So I was wondering if we would need an explicit:
>    allwinner,use-internal-phy;
>  boolean DT property to signal the usage of the internal PHY?
>  Alternatively we could go with the negative version:
>    allwinner,disable-internal-phy;
> 
>  Or what about introducing a new "allwinner,internal-mii-phy" compatible
>  string for the *PHY* node and use that?
> 
>  I just want to avoid that we introduce a binding that causes us
>  headaches later. I think we can still fix this with a followup patch
>  before the driver and its binding hit a release kernel.
> 
>  Cheers,
>  Andre.
> 
> >>>
> >>> I just see some patch, where "phy-mode = internal" is valid.
> >>> I will try to find a way to use it
> >>
> >> Can you provide a link?
> > 
> > https://lkml.org/lkml/2017/6/23/479
> > 
> >>
> >> I'm not a fan of using phy-mode for this. There's no guarantee what
> >> mode the internal PHY uses. That's what phy-mode is for.
> 
> I can understand Chen-Yu's concerns, but ...
> 
> > For each soc the internal PHY mode is know and setted in 
> > emac_variant/internal_phy
> > So its not a problem.
> 
> that is true as well, at least for now.
>
> So while I agree that having a separate property to indicate the usage
> of the internal PHY would be nice, I am bit tempted to use this easier
> approach and piggy back on the existing phy-mode property.

We're trying to fix an issue that works for now too.

If we want to consider future weird cases, then we must consider all
of them. And the phy mode changing is definitely not really far
fetched.

I agree with Chen-Yu, and I really feel like the compatible solution
you suggested would cover both your concerns, and ours.

Maxime

-- 
Maxime Ripard, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com


signature.asc
Description: PGP signature

[RESEND PATCH v4 2/3] Bluetooth: cmtp: fix possible might sleep error in cmtp_session

2017-06-27 Thread Jeffy Chen

It looks like cmtp_session has same pattern as the issue reported in
old rfcomm:

while (1) {
set_current_state(TASK_INTERRUPTIBLE);
if (condition)
break;
// may call might_sleep here
schedule();
}
__set_current_state(TASK_RUNNING);

Which fixed at:
dfb2fae Bluetooth: Fix nested sleeps

So let's fix it at the same way, also follow the suggestion of:
https://lwn.net/Articles/628628/

Signed-off-by: Jeffy Chen 
Reviewed-by: Brian Norris 
Reviewed-by: AL Yu-Chen Cho 

---

Changes in v4: None
Changes in v2:
Remove unnecessary memory barrier before wake_up_* functions.

 net/bluetooth/cmtp/core.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 9e59b66..1152ce3 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -280,16 +280,16 @@ static int cmtp_session(void *arg)
struct cmtp_session *session = arg;
struct sock *sk = session->sock->sk;
struct sk_buff *skb;
-   wait_queue_t wait;
+   DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
BT_DBG("session %p", session);
 
set_user_nice(current, -15);
 
-   init_waitqueue_entry(, current);
add_wait_queue(sk_sleep(sk), );
while (1) {
-   set_current_state(TASK_INTERRUPTIBLE);
+   /* Ensure session->terminate is updated */
+   smp_mb__before_atomic();
 
if (atomic_read(>terminate))
break;
@@ -306,9 +306,8 @@ static int cmtp_session(void *arg)
 
cmtp_process_transmit(session);
 
-   schedule();
+   wait_woken(, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
-   __set_current_state(TASK_RUNNING);
remove_wait_queue(sk_sleep(sk), );
 
down_write(_session_sem);
@@ -393,7 +392,7 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, 
struct socket *sock)
err = cmtp_attach_device(session);
if (err < 0) {
atomic_inc(>terminate);
-   wake_up_process(session->task);
+   wake_up_interruptible(sk_sleep(session->sock->sk));
up_write(_session_sem);
return err;
}
@@ -431,7 +430,11 @@ int cmtp_del_connection(struct cmtp_conndel_req *req)
 
/* Stop session thread */
atomic_inc(>terminate);
-   wake_up_process(session->task);
+
+   /* Ensure session->terminate is updated */
+   smp_mb__after_atomic();
+
+   wake_up_interruptible(sk_sleep(session->sock->sk));
} else
err = -ENOENT;
 
-- 
2.1.4

[RESEND PATCH v4 3/3] Bluetooth: hidp: fix possible might sleep error in hidp_session_thread

2017-06-27 Thread Jeffy Chen

It looks like hidp_session_thread has same pattern as the issue reported in
old rfcomm:

while (1) {
set_current_state(TASK_INTERRUPTIBLE);
if (condition)
break;
// may call might_sleep here
schedule();
}
__set_current_state(TASK_RUNNING);

Which fixed at:
dfb2fae Bluetooth: Fix nested sleeps

So let's fix it at the same way, also follow the suggestion of:
https://lwn.net/Articles/628628/

Signed-off-by: Jeffy Chen 
Tested-by: AL Yu-Chen Cho 
Tested-by: Rohit Vaswani 

---

Changes in v4:
1/ Make hidp_session_wake_function static.
2/ Remove unnecessary default_wake_function.

Changes in v2:
1/ Fix could not wake up by wake attempts on original wait queues.
2/ Remove unnecessary memory barrier before wake_up_* functions.

 net/bluetooth/hidp/core.c | 33 ++---
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 0bec458..1fc0764 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -36,6 +36,7 @@
 #define VERSION "1.2"
 
 static DECLARE_RWSEM(hidp_session_sem);
+static DECLARE_WAIT_QUEUE_HEAD(hidp_session_wq);
 static LIST_HEAD(hidp_session_list);
 
 static unsigned char hidp_keycode[256] = {
@@ -1068,12 +1069,12 @@ static int hidp_session_start_sync(struct hidp_session 
*session)
  * Wake up session thread and notify it to stop. This is asynchronous and
  * returns immediately. Call this whenever a runtime error occurs and you want
  * the session to stop.
- * Note: wake_up_process() performs any necessary memory-barriers for us.
+ * Note: wake_up_interruptible() performs any necessary memory-barriers for us.
  */
 static void hidp_session_terminate(struct hidp_session *session)
 {
atomic_inc(>terminate);
-   wake_up_process(session->task);
+   wake_up_interruptible(_session_wq);
 }
 
 /*
@@ -1180,7 +1181,9 @@ static void hidp_session_run(struct hidp_session *session)
struct sock *ctrl_sk = session->ctrl_sock->sk;
struct sock *intr_sk = session->intr_sock->sk;
struct sk_buff *skb;
+   DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
+   add_wait_queue(_session_wq, );
for (;;) {
/*
 * This thread can be woken up two ways:
@@ -1188,12 +1191,10 @@ static void hidp_session_run(struct hidp_session 
*session)
 *session->terminate flag and wakes this thread up.
 *  - Via modifying the socket state of ctrl/intr_sock. This
 *thread is woken up by ->sk_state_changed().
-*
-* Note: set_current_state() performs any necessary
-* memory-barriers for us.
 */
-   set_current_state(TASK_INTERRUPTIBLE);
 
+   /* Ensure session->terminate is updated */
+   smp_mb__before_atomic();
if (atomic_read(>terminate))
break;
 
@@ -1227,11 +1228,22 @@ static void hidp_session_run(struct hidp_session 
*session)
hidp_process_transmit(session, >ctrl_transmit,
  session->ctrl_sock);
 
-   schedule();
+   wait_woken(, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
+   remove_wait_queue(_session_wq, );
 
atomic_inc(>terminate);
-   set_current_state(TASK_RUNNING);
+
+   /* Ensure session->terminate is updated */
+   smp_mb__after_atomic();
+}
+
+static int hidp_session_wake_function(wait_queue_t *wait,
+ unsigned int mode,
+ int sync, void *key)
+{
+   wake_up_interruptible(_session_wq);
+   return false;
 }
 
 /*
@@ -1244,7 +1256,8 @@ static void hidp_session_run(struct hidp_session *session)
 static int hidp_session_thread(void *arg)
 {
struct hidp_session *session = arg;
-   wait_queue_t ctrl_wait, intr_wait;
+   DEFINE_WAIT_FUNC(ctrl_wait, hidp_session_wake_function);
+   DEFINE_WAIT_FUNC(intr_wait, hidp_session_wake_function);
 
BT_DBG("session %p", session);
 
@@ -1254,8 +1267,6 @@ static int hidp_session_thread(void *arg)
set_user_nice(current, -15);
hidp_set_timer(session);
 
-   init_waitqueue_entry(_wait, current);
-   init_waitqueue_entry(_wait, current);
add_wait_queue(sk_sleep(session->ctrl_sock->sk), _wait);
add_wait_queue(sk_sleep(session->intr_sock->sk), _wait);
/* This memory barrier is paired with wq_has_sleeper(). See
-- 
2.1.4

[RESEND PATCH v4 1/3] Bluetooth: bnep: fix possible might sleep error in bnep_session

2017-06-27 Thread Jeffy Chen

It looks like bnep_session has same pattern as the issue reported in
old rfcomm:

while (1) {
set_current_state(TASK_INTERRUPTIBLE);
if (condition)
break;
// may call might_sleep here
schedule();
}
__set_current_state(TASK_RUNNING);

Which fixed at:
dfb2fae Bluetooth: Fix nested sleeps

So let's fix it at the same way, also follow the suggestion of:
https://lwn.net/Articles/628628/

Signed-off-by: Jeffy Chen 
Reviewed-by: Brian Norris 
Reviewed-by: AL Yu-Chen Cho 
---

Changes in v4: None
Changes in v2: None

 net/bluetooth/bnep/core.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index fbf251f..4d6b94d 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -484,16 +484,16 @@ static int bnep_session(void *arg)
struct net_device *dev = s->dev;
struct sock *sk = s->sock->sk;
struct sk_buff *skb;
-   wait_queue_t wait;
+   DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
BT_DBG("");
 
set_user_nice(current, -15);
 
-   init_waitqueue_entry(, current);
add_wait_queue(sk_sleep(sk), );
while (1) {
-   set_current_state(TASK_INTERRUPTIBLE);
+   /* Ensure session->terminate is updated */
+   smp_mb__before_atomic();
 
if (atomic_read(>terminate))
break;
@@ -515,9 +515,8 @@ static int bnep_session(void *arg)
break;
netif_wake_queue(dev);
 
-   schedule();
+   wait_woken(, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
-   __set_current_state(TASK_RUNNING);
remove_wait_queue(sk_sleep(sk), );
 
/* Cleanup session */
@@ -666,7 +665,7 @@ int bnep_del_connection(struct bnep_conndel_req *req)
s = __bnep_get_session(req->dst);
if (s) {
atomic_inc(>terminate);
-   wake_up_process(s->task);
+   wake_up_interruptible(sk_sleep(s->sock->sk));
} else
err = -ENOENT;
 
-- 
2.1.4

[PATCH 2/6] arm: sun8i: orangepipc: use internal phy-mode

2017-06-27 Thread Corentin Labbe

Since the PHY used is internal, simply set phy-mode as internal.

Signed-off-by: Corentin Labbe 
---
 arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts 
b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
index f5f0f15a2088..94edeb889e55 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
@@ -120,7 +120,7 @@
 
  {
phy-handle = <_mii_phy>;
-   phy-mode = "mii";
+   phy-mode = "internal";
allwinner,leds-active-low;
status = "okay";
 };
-- 
2.13.0

[PATCH 1/6] net: stmmac: support future possible different internal phy mode

2017-06-27 Thread Corentin Labbe

The current way to find if the phy is internal is to compare DT phy-mode
and emac_variant/internal_phy.
But it will negate a possible future SoC where an external PHY use the
same phy mode than the internal one.

By using phy-mode = "internal" we permit to have an external PHY with
the same mode than the internal one.

Reported-by: André Przywara 
Signed-off-by: Corentin Labbe 
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c 
b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index fffd6d5fc907..6c2d1da05588 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -638,7 +638,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 {
struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
struct device_node *node = priv->device->of_node;
-   int ret;
+   int ret, phy_interface;
u32 reg, val;
 
regmap_read(gmac->regmap, SYSCON_EMAC_REG, );
@@ -718,7 +718,11 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
if (gmac->variant->support_rmii)
reg &= ~SYSCON_RMII_EN;
 
-   switch (priv->plat->interface) {
+   phy_interface = priv->plat->interface;
+   /* if PHY is internal, select the mode (xMII) used by the SoC */
+   if (gmac->use_internal_phy)
+   phy_interface = gmac->variant->internal_phy;
+   switch (phy_interface) {
case PHY_INTERFACE_MODE_MII:
/* default */
break;
@@ -932,7 +936,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
}
 
plat_dat->interface = of_get_phy_mode(dev->of_node);
-   if (plat_dat->interface == gmac->variant->internal_phy) {
+   if (plat_dat->interface == PHY_INTERFACE_MODE_INTERNAL) {
dev_info(>dev, "Will use internal PHY\n");
gmac->use_internal_phy = true;
gmac->ephy_clk = of_clk_get(plat_dat->phy_node, 0);
-- 
2.13.0

[PATCH 4/6] arm: sun8i: orangepi-one: use internal phy-mode

2017-06-27 Thread Corentin Labbe

Since the PHY used is internal, simply set phy-mode as internal.

Signed-off-by: Corentin Labbe 
---
 arch/arm/boot/dts/sun8i-h3-orangepi-one.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts 
b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
index 6880268e8b87..27e7ef4e42f2 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
@@ -100,7 +100,7 @@
 
  {
phy-handle = <_mii_phy>;
-   phy-mode = "mii";
+   phy-mode = "internal";
allwinner,leds-active-low;
status = "okay";
 };
-- 
2.13.0

[PATCH 6/6] arm: sun8i: orangepi-2: use internal phy-mode

2017-06-27 Thread Corentin Labbe

Since the PHY used is internal, simply set phy-mode as internal.

Signed-off-by: Corentin Labbe 
---
 arch/arm/boot/dts/sun8i-h3-orangepi-2.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts 
b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
index 17cdeae19c6f..a2a2b11dfeed 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
@@ -120,7 +120,7 @@
 
  {
phy-handle = <_mii_phy>;
-   phy-mode = "mii";
+   phy-mode = "internal";
allwinner,leds-active-low;
status = "okay";
 };
-- 
2.13.0

[PATCH 3/6] arm: sun8i: orangepi-zero: use internal phy-mode

2017-06-27 Thread Corentin Labbe

Since the PHY used is internal, simply set phy-mode as internal.

Signed-off-by: Corentin Labbe 
---
 arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts 
b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
index 6713d0f2b3f4..7c154b845baa 100644
--- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
+++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
@@ -106,7 +106,7 @@
 
  {
phy-handle = <_mii_phy>;
-   phy-mode = "mii";
+   phy-mode = "internal";
allwinner,leds-active-low;
status = "okay";
 };
-- 
2.13.0

[PATCH 5/6] arm: sun8i: nanopi-neo: use internal phy-mode

2017-06-27 Thread Corentin Labbe

Since the PHY used is internal, simply set phy-mode as internal.

Signed-off-by: Corentin Labbe 
---
 arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts 
b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
index 78f6c24952dd..5c5ba806e2f1 100644
--- a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
+++ b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
@@ -49,7 +49,7 @@
 
  {
phy-handle = <_mii_phy>;
-   phy-mode = "mii";
+   phy-mode = "internal";
allwinner,leds-active-low;
status = "okay";
 };
-- 
2.13.0

Re: [PATCH iproute2 3/5] rdma: Add device capability parsing

2017-06-27 Thread Leon Romanovsky

On Tue, Jun 27, 2017 at 07:06:04AM +0300, Leon Romanovsky wrote:
> On Mon, Jun 26, 2017 at 02:36:10PM -0600, Jason Gunthorpe wrote:
> > On Mon, Jun 26, 2017 at 10:21:03PM +0300, Leon Romanovsky wrote:
> > > On Mon, Jun 26, 2017 at 12:29:24PM -0600, Jason Gunthorpe wrote:
> > > > On Mon, Jun 26, 2017 at 09:21:26PM +0300, Leon Romanovsky wrote:
> > > > > From: Leon Romanovsky 
> > > > >
> > > > > Add parsing interface for the device capability flags
> > > > >
> > > > > $ rdma dev show
> > > > > 1: mlx5_0: caps 0x1257e1c26
> > > >
> > > > This seems very un ip-like. I wouldn't show an undecoded hex value
> > > > like that, it isn't really useful.
> > >
> > > It is first supported field, after new fields will be added, we will
> > > have very similar to ip interface.
> > >
> > > 1: mlx5_0: caps 0x1257e1c2 key_1 val_1 key_2 val_2 
> > >
> > > The values are presented as is can be usable as an input for different 
> > > scripts.
> >
> > I still wouldn't show an undecoded hex value.. It isn't useful.
> >
> > > > > $ rdma dev show mlx5_4 caps
> > > > > 5: mlx5_4: caps 0x1257e1c26
> > > > > Bit   Description
> > > > >  01   DEVICE_BAD_PKEY_CNTR
> > > > >  02   DEVICE_BAD_QKEY_CNTR
> > > >
> > > > This table also seems un ip-like, the usual format is a list of words,
> > > > I think.
> > >
> > > It is true for key<->value data, but it is less obvious for bit
> > > parsing.
> >
> > Several of the word decodes are from bit fields..
> >
> > > Internally, I tried to present them as list and it was ugly like hell
> > > without any chance (without extra parsing) to actual see if specific
> > > capability is present or no.
> >
> > lspci seems to have no problem being readable while doing this..
>
> No problem,
> If i understand you correctly, you are suggesting to drop parsing of
> "caps" as a separate command and embed it into general show .
>
> Can you help me and give an example of how would you present those caps?
> What will be the output of such command?
>  $ rdma dev show mlx5_4

ip-like style:

$ rdma dev show mlx5_4
5: mlx5_4:
caps: 
$ rdma link show mlx5_3
4/1: mlx5_3/1:
caps: 

Thanks

>
>  Thanks
>
> >
> > Jason




signature.asc
Description: PGP signature

[PATCH] datapath: Avoid using stack larger than 1024.

2017-06-27 Thread Tonghao Zhang

When compiling OvS-master on 4.4.0-81 kernel,
there is a warning:

CC [M]  /root/ovs/datapath/linux/datapath.o
/root/ovs/datapath/linux/datapath.c: In function
‘ovs_flow_cmd_set’:
/root/ovs/datapath/linux/datapath.c:1221:1: warning:
the frame size of 1040 bytes is larger than 1024 bytes
[-Wframe-larger-than=]

This patch use kmalloc to malloc mem for sw_flow_mask and
avoid using stack.

Signed-off-by: Tonghao Zhang 
---
 net/openvswitch/datapath.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index c85029c..da8cd68 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1107,7 +1107,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct 
genl_info *info)
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow_key key;
struct sw_flow *flow;
-   struct sw_flow_mask mask;
+   struct sw_flow_mask *mask;
struct sk_buff *reply = NULL;
struct datapath *dp;
struct sw_flow_actions *old_acts = NULL, *acts = NULL;
@@ -1120,7 +1120,11 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct 
genl_info *info)
 
ufid_present = ovs_nla_get_ufid(, a[OVS_FLOW_ATTR_UFID], log);
if (a[OVS_FLOW_ATTR_KEY]) {
-   ovs_match_init(, , true, );
+   mask = kmalloc(sizeof(struct sw_flow_mask), GFP_KERNEL);
+   if (!mask)
+   return -ENOMEM;
+
+   ovs_match_init(, , true, mask);
error = ovs_nla_get_match(net, , a[OVS_FLOW_ATTR_KEY],
  a[OVS_FLOW_ATTR_MASK], log);
} else if (!ufid_present) {
@@ -1141,7 +1145,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct 
genl_info *info)
}
 
acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], ,
-   , log);
+   mask, log);
if (IS_ERR(acts)) {
error = PTR_ERR(acts);
goto error;
@@ -1216,6 +1220,7 @@ err_unlock_ovs:
 err_kfree_acts:
ovs_nla_free_flow_actions(acts);
 error:
+   kfree(mask);
return error;
 }
 
-- 
1.8.3.1

Re: [net-next v2 6/6] ixgbe: Add malicious driver detection support

2017-06-27 Thread Or Gerlitz

On Tue, Jun 27, 2017 at 11:51 AM, Jeff Kirsher
 wrote:
> From: Paul Greenwalt 
>
> Add malicious driver detection (MDD) support for X550, X550em_a,
> and X550em_x devices.
>
> MDD is a hardware SR-IOV security feature which the driver enables by
> default, but can be controlled on|off by ethtool set-priv-flags

wait, we have the trusted vf concept, which you implement
(ixgbe_ndo_set_vf_trust)
so you can enable by default for all vfs and disable it for trusted
ones, why create
an ixgbe special config knob? IMHO we should max all possible efforts to avoid
priv ethtool flags usage.

Or.

Re: [PATCH v6 05/21] net-next: stmmac: Add dwmac-sun8i

2017-06-27 Thread Andre Przywara

Hi,

(CC:ing some people from that Rockchip dmwac series)

On 27/06/17 09:21, Corentin Labbe wrote:
> On Tue, Jun 27, 2017 at 04:11:21PM +0800, Chen-Yu Tsai wrote:
>> On Tue, Jun 27, 2017 at 4:05 PM, Corentin Labbe
>>  wrote:
>>> On Mon, Jun 26, 2017 at 01:18:23AM +0100, André Przywara wrote:
 On 31/05/17 08:18, Corentin Labbe wrote:
> The dwmac-sun8i is a heavy hacked version of stmmac hardware by
> allwinner.
> In fact the only common part is the descriptor management and the first
> register function.

 Hi,

 I know I am a bit late with this, but while adapting the U-Boot driver
 to the new binding I was wondering about the internal PHY detection:

 So here you seem to deduce the usage of the internal PHY by the PHY
 interface specified in the DT (MII = internal, RGMII = external).
 I think I raised this question before, but isn't it perfectly legal for
 a board to use MII with an external PHY even on those SoCs that feature
 an internal PHY?
 On the first glance that does not make too much sense, but apart from
 not being the correct binding to describe all of the SoCs features I see
 two scenarios:
 1) A board vendor might choose to not use the internal PHY because it
 has bugs, lacks features (configurability) or has other issues. For
 instance I have heard reports that the internal PHY makes the SoC go
 rather hot, possibly limiting the CPU frequency. By using an external
 MII PHY (which are still cheaper than RGMII PHYs) this can be avoided.
 2) A PHY does not necessarily need to be directly connected to
 magnetics. Indeed quite some boards use (RG)MII to connect to a switch
 IC or some other network circuitry, for instance fibre connectors.

 So I was wondering if we would need an explicit:
   allwinner,use-internal-phy;
 boolean DT property to signal the usage of the internal PHY?
 Alternatively we could go with the negative version:
   allwinner,disable-internal-phy;

 Or what about introducing a new "allwinner,internal-mii-phy" compatible
 string for the *PHY* node and use that?

 I just want to avoid that we introduce a binding that causes us
 headaches later. I think we can still fix this with a followup patch
 before the driver and its binding hit a release kernel.

 Cheers,
 Andre.

>>>
>>> I just see some patch, where "phy-mode = internal" is valid.
>>> I will try to find a way to use it
>>
>> Can you provide a link?
> 
> https://lkml.org/lkml/2017/6/23/479
> 
>>
>> I'm not a fan of using phy-mode for this. There's no guarantee what
>> mode the internal PHY uses. That's what phy-mode is for.

I can understand Chen-Yu's concerns, but ...

> For each soc the internal PHY mode is know and setted in 
> emac_variant/internal_phy
> So its not a problem.

that is true as well, at least for now.

So while I agree that having a separate property to indicate the usage
of the internal PHY would be nice, I am bit tempted to use this easier
approach and piggy back on the existing phy-mode property.

Are there any insights from the people involved with the Rockchip
internal PHY?
It is worth to introduce a generic boolean property for an internal PHY?
Or shall we actually move this more into the PHY code, introducing new
compatibles for the internal Allwinner and Rockchip Ethernet PHYs?

Cheers,
Andre.

Re: [PATCH v6 05/21] net-next: stmmac: Add dwmac-sun8i

2017-06-27 Thread Corentin Labbe

On Mon, Jun 26, 2017 at 01:18:23AM +0100, André Przywara wrote:
> On 31/05/17 08:18, Corentin Labbe wrote:
> > The dwmac-sun8i is a heavy hacked version of stmmac hardware by
> > allwinner.
> > In fact the only common part is the descriptor management and the first
> > register function.
> 
> Hi,
> 
> I know I am a bit late with this, but while adapting the U-Boot driver
> to the new binding I was wondering about the internal PHY detection:
> 
> 
> So here you seem to deduce the usage of the internal PHY by the PHY
> interface specified in the DT (MII = internal, RGMII = external).
> I think I raised this question before, but isn't it perfectly legal for
> a board to use MII with an external PHY even on those SoCs that feature
> an internal PHY?
> On the first glance that does not make too much sense, but apart from
> not being the correct binding to describe all of the SoCs features I see
> two scenarios:
> 1) A board vendor might choose to not use the internal PHY because it
> has bugs, lacks features (configurability) or has other issues. For
> instance I have heard reports that the internal PHY makes the SoC go
> rather hot, possibly limiting the CPU frequency. By using an external
> MII PHY (which are still cheaper than RGMII PHYs) this can be avoided.
> 2) A PHY does not necessarily need to be directly connected to
> magnetics. Indeed quite some boards use (RG)MII to connect to a switch
> IC or some other network circuitry, for instance fibre connectors.
> 
> So I was wondering if we would need an explicit:
>   allwinner,use-internal-phy;
> boolean DT property to signal the usage of the internal PHY?
> Alternatively we could go with the negative version:
>   allwinner,disable-internal-phy;
> 
> Or what about introducing a new "allwinner,internal-mii-phy" compatible
> string for the *PHY* node and use that?
> 
> I just want to avoid that we introduce a binding that causes us
> headaches later. I think we can still fix this with a followup patch
> before the driver and its binding hit a release kernel.
> 
> Cheers,
> Andre.
> 

I just see some patch, where "phy-mode = internal" is valid.
I will try to find a way to use it

Regards

RE: [PATCH] net: usb: asix88179_178a: Add support for the Belkin B2B128

2017-06-27 Thread David Laight

From: Andrew F. Davis
> Sent: 26 June 2017 18:41
> The Belkin B2B128 is a USB 3.0 Hub + Gigabit Ethernet Adapter, the
> Ethernet adapter uses the ASIX AX88179 USB 3.0 to Gigabit Ethernet
> chip supported by this driver, add the USB ID for the same.
...

I've just had a look at the current version of ax88179_178a.c.
It still makes me pull my hair out

Not the least of the problems is that it lies about skb->truesize.
All the receive skb are longer than 16k - so will be 64k, but
it sets skb->truesize based on the actual receive frame size.

A lot of the code is also 'over complicated' - making it slower
that strictly necessary.

There is also the more general problem that usbnet is horribly
inefficient for anything trying to run at Ge speeds (never mind
anything faster.

David

[net-next v2 0/6][pull request] 10GbE Intel Wired LAN Driver Updates 2017-06-27

2017-06-27 Thread Jeff Kirsher

This series contains updates to ixgbe only.

Tony provides majority of the changes, starting with adding a check to
ensure that adding a MAC filter was successful, before setting the
MACVLAN.  In order to receive notifications of link configurations of the
external PHY and support the configuration of the internal iXFI link on
X552 devices, Tony enables LASI interrupts.  Update the iXFI driver code
flow, since the MAC register NW_MNG_IF_SEL fields have been redefined for
X553 devices, so add MAC checks for iXFI flows.  Added additional checks
for flow control autonegotiation, since it is not support for X553 fiber
 and XFI devices.

Paul adds malicious driver detection (MDD) support for X550* devices.  MDD
is a hardware SR-IOV security feature which the driver enables by default,
and can be controlled by ethtool set-priv-flags parameter.

v2: removed unnecessary parens noticed by David Miller in patch 6 of the
series.

The following are changes since commit 593814d1beae8ad91be6c90f95764e09fc7ca236:
  net/mlx4: fix spelling mistake: "coalesing" -> "coalescing"
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 10GbE

Paul Greenwalt (1):
  ixgbe: Add malicious driver detection support

Tony Nguyen (5):
  ixgbe: Ensure MAC filter was added before setting MACVLAN
  ixgbe: Enable LASI interrupts for X552 devices
  ixgbe: Update NW_MNG_IF_SEL support for X553
  ixgbe: Do not support flow control autonegotiation for X553
  ixgbe: Disable flow control for XFI

 drivers/net/ethernet/intel/ixgbe/ixgbe.h |   3 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c  |  30 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c  |  25 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |  13 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c|   8 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c   |  66 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h|  12 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c| 240 +++
 8 files changed, 340 insertions(+), 57 deletions(-)

-- 
2.12.2

[net-next v2 5/6] ixgbe: Disable flow control for XFI

2017-06-27 Thread Jeff Kirsher

From: Tony Nguyen 

Flow control autonegotiation is not supported for XFI.  Make sure that
ixgbe_device_supports_autoneg_fc() returns false and
hw->fc.disable_fc_autoneg is set to true to avoid running the fc_autoneg
function for that device.

Signed-off-by: Tony Nguyen 
Signed-off-by: Emil Tantilov 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c |  5 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c   | 57 ++---
 2 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 40ae7db468ea..2c19070d2a0b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -97,7 +97,10 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
 
break;
case ixgbe_media_type_backplane:
-   supported = true;
+   if (hw->device_id == IXGBE_DEV_ID_X550EM_X_XFI)
+   supported = false;
+   else
+   supported = true;
break;
case ixgbe_media_type_copper:
/* only some copper devices support flow control autoneg */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 95adbda36235..19fbb2f28ea4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -2843,7 +2843,7 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw)
 {
bool pause, asm_dir;
u32 reg_val;
-   s32 rc;
+   s32 rc = 0;
 
/* Validate the requested mode */
if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
@@ -2886,32 +2886,37 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw)
return IXGBE_ERR_CONFIG;
}
 
-   if (hw->device_id != IXGBE_DEV_ID_X550EM_X_KR &&
-   hw->device_id != IXGBE_DEV_ID_X550EM_A_KR &&
-   hw->device_id != IXGBE_DEV_ID_X550EM_A_KR_L)
-   return 0;
-
-   rc = hw->mac.ops.read_iosf_sb_reg(hw,
- IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
- IXGBE_SB_IOSF_TARGET_KR_PHY,
- _val);
-   if (rc)
-   return rc;
-
-   reg_val &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
-IXGBE_KRM_AN_CNTL_1_ASM_PAUSE);
-   if (pause)
-   reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE;
-   if (asm_dir)
-   reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
-   rc = hw->mac.ops.write_iosf_sb_reg(hw,
-  IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
-  IXGBE_SB_IOSF_TARGET_KR_PHY,
-  reg_val);
-
-   /* This device does not fully support AN. */
-   hw->fc.disable_fc_autoneg = true;
+   switch (hw->device_id) {
+   case IXGBE_DEV_ID_X550EM_X_KR:
+   case IXGBE_DEV_ID_X550EM_A_KR:
+   case IXGBE_DEV_ID_X550EM_A_KR_L:
+   rc = hw->mac.ops.read_iosf_sb_reg(hw,
+   IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+   IXGBE_SB_IOSF_TARGET_KR_PHY,
+   _val);
+   if (rc)
+   return rc;
 
+   reg_val &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
+IXGBE_KRM_AN_CNTL_1_ASM_PAUSE);
+   if (pause)
+   reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE;
+   if (asm_dir)
+   reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
+   rc = hw->mac.ops.write_iosf_sb_reg(hw,
+   IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+   IXGBE_SB_IOSF_TARGET_KR_PHY,
+   reg_val);
+
+   /* This device does not fully support AN. */
+   hw->fc.disable_fc_autoneg = true;
+   break;
+   case IXGBE_DEV_ID_X550EM_X_XFI:
+   hw->fc.disable_fc_autoneg = true;
+   break;
+   default:
+   break;
+   }
return rc;
 }
 
-- 
2.12.2

[net-next v2 4/6] ixgbe: Do not support flow control autonegotiation for X553

2017-06-27 Thread Jeff Kirsher

From: Tony Nguyen 

Flow control autonegotiation is not supported for fiber on X553.  Add
device ID checks in ixgbe_device_supports_autoneg_fc() to return the
appropriate value.

Signed-off-by: Tony Nguyen 
Signed-off-by: Emil Tantilov 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 4e35e7017f3d..40ae7db468ea 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -79,13 +79,22 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
 
switch (hw->phy.media_type) {
case ixgbe_media_type_fiber:
-   hw->mac.ops.check_link(hw, , _up, false);
-   /* if link is down, assume supported */
-   if (link_up)
-   supported = speed == IXGBE_LINK_SPEED_1GB_FULL ?
+   /* flow control autoneg black list */
+   switch (hw->device_id) {
+   case IXGBE_DEV_ID_X550EM_A_SFP:
+   case IXGBE_DEV_ID_X550EM_A_SFP_N:
+   supported = false;
+   break;
+   default:
+   hw->mac.ops.check_link(hw, , _up, false);
+   /* if link is down, assume supported */
+   if (link_up)
+   supported = speed == IXGBE_LINK_SPEED_1GB_FULL ?
true : false;
-   else
-   supported = true;
+   else
+   supported = true;
+   }
+
break;
case ixgbe_media_type_backplane:
supported = true;
@@ -111,6 +120,10 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
break;
}
 
+   if (!supported)
+   hw_dbg(hw, "Device %x does not support flow control autoneg\n",
+  hw->device_id);
+
return supported;
 }
 
-- 
2.12.2

[net-next v2 6/6] ixgbe: Add malicious driver detection support

2017-06-27 Thread Jeff Kirsher

From: Paul Greenwalt 

Add malicious driver detection (MDD) support for X550, X550em_a,
and X550em_x devices.

MDD is a hardware SR-IOV security feature which the driver enables by
default, but can be controlled on|off by ethtool set-priv-flags
parameter. When enabled MDD disables a VF drivers transmit queue
when a malformed descriptor is detected. The PF will log the event
and re-enable the VF queue.

Signed-off-by: Paul Greenwalt 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h |   3 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c  |  25 +++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |  13 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c|   6 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c   |  50 
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h|   8 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c| 138 +++
 7 files changed, 241 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index dd5578756ae0..2e9df66f6e18 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -563,6 +563,8 @@ struct ixgbe_mac_addr {
 #define IXGBE_TRY_LINK_TIMEOUT (4 * HZ)
 #define IXGBE_SFP_POLL_JIFFIES (2 * HZ)/* SFP poll every 2 seconds */
 
+#define IXGBE_MDD_Q_BITMAP_DEPTH 2
+
 /* board specific private data structure */
 struct ixgbe_adapter {
unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
@@ -603,6 +605,7 @@ struct ixgbe_adapter {
 #define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER BIT(26)
 #define IXGBE_FLAG_DCB_CAPABLE BIT(27)
 #define IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE  BIT(28)
+#define IXGBE_FLAG_MDD_ENABLED BIT(29)
 
u32 flags2;
 #define IXGBE_FLAG2_RSC_CAPABLEBIT(0)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
index 78c52375acc6..53f260dbfb5f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
@@ -379,10 +379,22 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
} else {
hw->mac.ops.fc_enable(hw);
}
+   /* Disable MDD before updating SRRCTL, because modifying the
+* SRRCTL register while the queue is enabled will generate an
+* MDD event.
+*/
+   if (adapter->num_vfs && hw->mac.ops.disable_mdd &&
+   (adapter->flags & IXGBE_FLAG_MDD_ENABLED))
+   hw->mac.ops.disable_mdd(hw);
 
ixgbe_set_rx_drop_en(adapter);
 
-   ret = DCB_HW_CHG;
+   if (adapter->num_vfs && hw->mac.ops.enable_mdd &&
+   (adapter->flags & IXGBE_FLAG_MDD_ENABLED))
+   hw->mac.ops.enable_mdd(hw);
+
+   if (ret != DCB_HW_CHG_RST)
+   ret = DCB_HW_CHG;
}
 
 #ifdef IXGBE_FCOE
@@ -634,8 +646,19 @@ static int ixgbe_dcbnl_ieee_setpfc(struct net_device *dev,
else
err = hw->mac.ops.fc_enable(hw);
 
+   /* Disable MDD before updating SRRCTL, because modifying the SRRCTL
+* register while the queue is enabled will generate an MDD event.
+*/
+   if (adapter->num_vfs && hw->mac.ops.disable_mdd &&
+   (adapter->flags & IXGBE_FLAG_MDD_ENABLED))
+   hw->mac.ops.disable_mdd(hw);
+
ixgbe_set_rx_drop_en(adapter);
 
+   if (adapter->num_vfs && hw->mac.ops.enable_mdd &&
+   (adapter->flags & IXGBE_FLAG_MDD_ENABLED))
+   hw->mac.ops.enable_mdd(hw);
+
return err;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 72c565712a5f..e10a4d6d5391 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -157,6 +157,8 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
 static const char ixgbe_priv_flags_strings[][ETH_GSTRING_LEN] = {
 #define IXGBE_PRIV_FLAGS_LEGACY_RX BIT(0)
"legacy-rx",
+#define IXGBE_PRIV_FLAG_MDD_ENABLEDBIT(1)
+   "mdd",
 };
 
 #define IXGBE_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbe_priv_flags_strings)
@@ -3420,6 +3422,9 @@ static u32 ixgbe_get_priv_flags(struct net_device *netdev)
struct ixgbe_adapter *adapter = netdev_priv(netdev);
u32 priv_flags = 0;
 
+   if (adapter->flags & IXGBE_FLAG_MDD_ENABLED)
+   priv_flags |= IXGBE_PRIV_FLAG_MDD_ENABLED;
+
if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
priv_flags |= IXGBE_PRIV_FLAGS_LEGACY_RX;
 
@@ -3430,13 +3435,19 @@ static int ixgbe_set_priv_flags(struct

[net-next v2 1/6] ixgbe: Ensure MAC filter was added before setting MACVLAN

2017-06-27 Thread Jeff Kirsher

From: Tony Nguyen 

This patch adds a check to ensure that adding the MAC filter was
successful before setting the MACVLAN.  If it was unsuccessful, propagate
the error.

Signed-off-by: Tony Nguyen 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 0760bd7eeb01..ca492876bd3d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -681,6 +681,7 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter 
*adapter,
 {
struct list_head *pos;
struct vf_macvlans *entry;
+   s32 retval = 0;
 
if (index <= 1) {
list_for_each(pos, >vf_mvs.l) {
@@ -721,14 +722,15 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter 
*adapter,
if (!entry || !entry->free)
return -ENOSPC;
 
-   entry->free = false;
-   entry->is_macvlan = true;
-   entry->vf = vf;
-   memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);
-
-   ixgbe_add_mac_filter(adapter, mac_addr, vf);
+   retval = ixgbe_add_mac_filter(adapter, mac_addr, vf);
+   if (retval >= 0) {
+   entry->free = false;
+   entry->is_macvlan = true;
+   entry->vf = vf;
+   memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);
+   }
 
-   return 0;
+   return retval;
 }
 
 static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
-- 
2.12.2

[net-next v2 2/6] ixgbe: Enable LASI interrupts for X552 devices

2017-06-27 Thread Jeff Kirsher

From: Tony Nguyen 

Enable LASI interrupts on X552 devices in order to receive notifications of
link configurations of the external PHY and support the configuration of
the internal iXFI link since iXFI does not support auto-negotiation.  This
is not required for X553 devices; add a check to avoid enabling LASI
interrupts for X553 devices.

Signed-off-by: Tony Nguyen 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 31 +++
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 72d84a065e34..aa34e0b131bb 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -2404,17 +2404,30 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct 
ixgbe_hw *hw)
status = ixgbe_get_lasi_ext_t_x550em(hw, );
 
/* Enable link status change alarm */
-   status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
- MDIO_MMD_AN, );
-   if (status)
-   return status;
 
-   reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN;
+   /* Enable the LASI interrupts on X552 devices to receive notifications
+* of the link configurations of the external PHY and correspondingly
+* support the configuration of the internal iXFI link, since iXFI does
+* not support auto-negotiation. This is not required for X553 devices
+* having KR support, which performs auto-negotiations and which is used
+* as the internal link to the external PHY. Hence adding a check here
+* to avoid enabling LASI interrupts for X553 devices.
+*/
+   if (hw->mac.type != ixgbe_mac_x550em_a) {
+   status = hw->phy.ops.read_reg(hw,
+   IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
+   MDIO_MMD_AN, );
+   if (status)
+   return status;
+
+   reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN;
 
-   status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
-  MDIO_MMD_AN, reg);
-   if (status)
-   return status;
+   status = hw->phy.ops.write_reg(hw,
+   IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
+   MDIO_MMD_AN, reg);
+   if (status)
+   return status;
+   }
 
/* Enable high temperature failure and global fault alarms */
status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
-- 
2.12.2

[net-next v2 3/6] ixgbe: Update NW_MNG_IF_SEL support for X553

2017-06-27 Thread Jeff Kirsher

From: Tony Nguyen 

The MAC register NW_MNG_IF_SEL fields have been redefined for
X553. These changes impact the iXFI driver code flow. Since iXFI is
only supported in X552, add MAC checks for iXFI flows.

Signed-off-by: Tony Nguyen 
Signed-off-by: Paul Greenwalt 
Tested-by: Andrew Bowers 
Signed-off-by: Jeff Kirsher 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h |  4 ++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 14 +++---
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index f1dbdf26d8e1..4df921f8a48c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -386,7 +386,7 @@ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
if (ixgbe_removed(reg_addr))
return IXGBE_FAILED_READ_REG;
if (unlikely(hw->phy.nw_mng_if_sel &
-IXGBE_NW_MNG_IF_SEL_ENABLE_10_100M)) {
+IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE)) {
struct ixgbe_adapter *adapter;
int i;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 9c2460c5ef1b..ffa0ee5cd0f5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -3778,8 +3778,8 @@ struct ixgbe_info {
 #define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_1G   BIT(19)
 #define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_2_5G BIT(20)
 #define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_10G  BIT(21)
-#define IXGBE_NW_MNG_IF_SEL_ENABLE_10_100M BIT(23)
-#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE   BIT(24)
+#define IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE   BIT(25)
+#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE   BIT(24) /* X552 only */
 #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT 3
 #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD   \
(0x1F << IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index aa34e0b131bb..95adbda36235 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1555,9 +1555,14 @@ static s32 ixgbe_restart_an_internal_phy_x550em(struct 
ixgbe_hw *hw)
  **/
 static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed 
*speed)
 {
+   struct ixgbe_mac_info *mac = >mac;
s32 status;
u32 reg_val;
 
+   /* iXFI is only supported with X552 */
+   if (mac->type != ixgbe_mac_X550EM_x)
+   return IXGBE_ERR_LINK_SETUP;
+
/* Disable AN and force speed to 10G Serial. */
status = ixgbe_read_iosf_sb_reg_x550(hw,
IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
@@ -1874,8 +1879,10 @@ static s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw 
*hw,
else
force_speed = IXGBE_LINK_SPEED_1GB_FULL;
 
-   /* If internal link mode is XFI, then setup XFI internal link. */
-   if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
+   /* If X552 and internal link mode is XFI, then setup XFI internal link.
+*/
+   if (hw->mac.type == ixgbe_mac_X550EM_x &&
+   !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
status = ixgbe_setup_ixfi_x550em(hw, _speed);
 
if (status)
@@ -2628,7 +2635,8 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct 
ixgbe_hw *hw)
if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
return IXGBE_ERR_CONFIG;
 
-   if (hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) {
+   if (!(hw->mac.type == ixgbe_mac_X550EM_x &&
+ !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE))) {
speed = IXGBE_LINK_SPEED_10GB_FULL |
IXGBE_LINK_SPEED_1GB_FULL;
return ixgbe_setup_kr_speed_x550em(hw, speed);
-- 
2.12.2

Re: [PATCH v6 05/21] net-next: stmmac: Add dwmac-sun8i

2017-06-27 Thread Corentin Labbe

On Tue, Jun 27, 2017 at 04:11:21PM +0800, Chen-Yu Tsai wrote:
> On Tue, Jun 27, 2017 at 4:05 PM, Corentin Labbe
>  wrote:
> > On Mon, Jun 26, 2017 at 01:18:23AM +0100, André Przywara wrote:
> >> On 31/05/17 08:18, Corentin Labbe wrote:
> >> > The dwmac-sun8i is a heavy hacked version of stmmac hardware by
> >> > allwinner.
> >> > In fact the only common part is the descriptor management and the first
> >> > register function.
> >>
> >> Hi,
> >>
> >> I know I am a bit late with this, but while adapting the U-Boot driver
> >> to the new binding I was wondering about the internal PHY detection:
> >>
> >>
> >> So here you seem to deduce the usage of the internal PHY by the PHY
> >> interface specified in the DT (MII = internal, RGMII = external).
> >> I think I raised this question before, but isn't it perfectly legal for
> >> a board to use MII with an external PHY even on those SoCs that feature
> >> an internal PHY?
> >> On the first glance that does not make too much sense, but apart from
> >> not being the correct binding to describe all of the SoCs features I see
> >> two scenarios:
> >> 1) A board vendor might choose to not use the internal PHY because it
> >> has bugs, lacks features (configurability) or has other issues. For
> >> instance I have heard reports that the internal PHY makes the SoC go
> >> rather hot, possibly limiting the CPU frequency. By using an external
> >> MII PHY (which are still cheaper than RGMII PHYs) this can be avoided.
> >> 2) A PHY does not necessarily need to be directly connected to
> >> magnetics. Indeed quite some boards use (RG)MII to connect to a switch
> >> IC or some other network circuitry, for instance fibre connectors.
> >>
> >> So I was wondering if we would need an explicit:
> >>   allwinner,use-internal-phy;
> >> boolean DT property to signal the usage of the internal PHY?
> >> Alternatively we could go with the negative version:
> >>   allwinner,disable-internal-phy;
> >>
> >> Or what about introducing a new "allwinner,internal-mii-phy" compatible
> >> string for the *PHY* node and use that?
> >>
> >> I just want to avoid that we introduce a binding that causes us
> >> headaches later. I think we can still fix this with a followup patch
> >> before the driver and its binding hit a release kernel.
> >>
> >> Cheers,
> >> Andre.
> >>
> >
> > I just see some patch, where "phy-mode = internal" is valid.
> > I will try to find a way to use it
> 
> Can you provide a link?

https://lkml.org/lkml/2017/6/23/479

> 
> I'm not a fan of using phy-mode for this. There's no guarantee what
> mode the internal PHY uses. That's what phy-mode is for.

For each soc the internal PHY mode is know and setted in 
emac_variant/internal_phy
So its not a problem.

Patch comming soon

RE: [RFC 02/19] qed: Implement iWARP initialization, teardown and qp operations

2017-06-27 Thread Kalderon, Michal

From: Leon Romanovsky [mailto:l...@kernel.org]
Sent: Tuesday, June 27, 2017 8:46 AM
> 
> On Mon, Jun 26, 2017 at 09:06:52PM +0300, Michal Kalderon wrote:
> > This patch adds iWARP support for flows that have common code between
> > RoCE and iWARP, such as initialization, teardown and qp setup verbs:
> > create, destroy, modify, query.
> > It introduces the iWARP specific files qed_iwarp.[ch] and
> > iwarp_common.h
> >
> > Signed-off-by: Michal Kalderon 
> > Signed-off-by: Yuval Mintz 
> > Signed-off-by: Ariel Elior 
> >
> > ---
> 
> <...>
> 
> > +#define QED_IWARP_PARAM_P2P(1)
> 
> <...>
> 
> > +
> > +   iwarp_info->peer2peer = QED_IWARP_PARAM_P2P;
> 
> Can you shed a light what is it?
> Thanks
It's a mode in MPA rev2,  It's currently hard coded, and we plan on making this 
configurable in the future, we're looking into using the rdmatool,

RE: [RFC 15/19] RDMA/qedr: Add iWARP support in existing verbs.

2017-06-27 Thread Kalderon, Michal

From: Leon Romanovsky [mailto:l...@kernel.org]
Sent: Tuesday, June 27, 2017 8:27 AM
 
> On Mon, Jun 26, 2017 at 09:07:05PM +0300, Michal Kalderon wrote:
> > Make slight modifications to common RoCE/iWARP code.
> > Add additional doorbell for iWARP post_send.
> > iWARP QP pbl is allocated in qed and not in qedr.
> >
> > Signed-off-by: Michal Kalderon 
> > Signed-off-by: Ram Amrani 
> > Signed-off-by: Ariel Elior 
> >
> > ---
> >  drivers/infiniband/hw/qedr/qedr.h  |   3 +
> >  drivers/infiniband/hw/qedr/verbs.c | 171
> +
> >  2 files changed, 139 insertions(+), 35 deletions(-)
> >
> > diff --git a/drivers/infiniband/hw/qedr/qedr.h
> b/drivers/infiniband/hw/qedr/qedr.h
> > index c52fde0..0c0a39a 100644
> > --- a/drivers/infiniband/hw/qedr/qedr.h
> > +++ b/drivers/infiniband/hw/qedr/qedr.h
> > @@ -319,6 +319,9 @@ struct qedr_qp_hwq_info {
> > /* DB */
> > void __iomem *db;
> > union db_prod32 db_data;
> > +
> > +   void __iomem *iwarp_db2;
> > +   union db_prod32 iwarp_db2_data;
> 
> Why do you need two doorbells?

This is a hw requirement to handle error state in iWARP 
> >  };

RE: [RFC 01/19] qed: Introduce iWARP personality

2017-06-27 Thread Kalderon, Michal

From: Leon Romanovsky [mailto:l...@kernel.org]
Sent: Tuesday, June 27, 2017 8:37 AM
> 
> On Mon, Jun 26, 2017 at 09:06:51PM +0300, Michal Kalderon wrote:
> > iWARP personality introduced the need for differentiating in several
> > places in the code whether we are RoCE, iWARP or either. This leads to
> > introducing new macros for querying the personality.
> >
> > Signed-off-by: Michal Kalderon 
> > Signed-off-by: Yuval Mintz 
> > Signed-off-by: Ariel Elior 
> >
> > ---
> >  drivers/net/ethernet/qlogic/qed/qed.h  | 26 +++--
> -
> >  drivers/net/ethernet/qlogic/qed/qed_cxt.c  |  8 
> > drivers/net/ethernet/qlogic/qed/qed_dev.c  | 12 +---
> >  drivers/net/ethernet/qlogic/qed/qed_l2.c   |  3 +--
> >  drivers/net/ethernet/qlogic/qed/qed_ll2.c  |  2 +-
> > drivers/net/ethernet/qlogic/qed/qed_main.c | 17 -
> >  include/linux/qed/common_hsi.h |  2 +-
> >  7 files changed, 43 insertions(+), 27 deletions(-)
> 
> I see that these changes are in Ethernet part of your driver, but for RDMA 
> part,
> there are already available inline functions:
> rdma_protocol_iwarp, rdma_protocol_roce.
> 
> Please avoid introducing new IS_IWARP/IS_ROCE macros and the decision
> should be taken on port level and not on device, despite the fact that 
> probably
> your ib_device has only one port.
> 
> Thanks

We still need to differentiate between iWARP/RoCE before 
rdma_protocol_iwarp/rdma_protocol_roce will be valid
(allocating resources, registering ib device, initializing ib_device, and 
capabilities.
For the rest of the places, after the port capabilities are valid, we'll use 
the inline functions.

Re: [PATCH v6 05/21] net-next: stmmac: Add dwmac-sun8i

2017-06-27 Thread Chen-Yu Tsai

On Tue, Jun 27, 2017 at 4:05 PM, Corentin Labbe
 wrote:
> On Mon, Jun 26, 2017 at 01:18:23AM +0100, André Przywara wrote:
>> On 31/05/17 08:18, Corentin Labbe wrote:
>> > The dwmac-sun8i is a heavy hacked version of stmmac hardware by
>> > allwinner.
>> > In fact the only common part is the descriptor management and the first
>> > register function.
>>
>> Hi,
>>
>> I know I am a bit late with this, but while adapting the U-Boot driver
>> to the new binding I was wondering about the internal PHY detection:
>>
>>
>> So here you seem to deduce the usage of the internal PHY by the PHY
>> interface specified in the DT (MII = internal, RGMII = external).
>> I think I raised this question before, but isn't it perfectly legal for
>> a board to use MII with an external PHY even on those SoCs that feature
>> an internal PHY?
>> On the first glance that does not make too much sense, but apart from
>> not being the correct binding to describe all of the SoCs features I see
>> two scenarios:
>> 1) A board vendor might choose to not use the internal PHY because it
>> has bugs, lacks features (configurability) or has other issues. For
>> instance I have heard reports that the internal PHY makes the SoC go
>> rather hot, possibly limiting the CPU frequency. By using an external
>> MII PHY (which are still cheaper than RGMII PHYs) this can be avoided.
>> 2) A PHY does not necessarily need to be directly connected to
>> magnetics. Indeed quite some boards use (RG)MII to connect to a switch
>> IC or some other network circuitry, for instance fibre connectors.
>>
>> So I was wondering if we would need an explicit:
>>   allwinner,use-internal-phy;
>> boolean DT property to signal the usage of the internal PHY?
>> Alternatively we could go with the negative version:
>>   allwinner,disable-internal-phy;
>>
>> Or what about introducing a new "allwinner,internal-mii-phy" compatible
>> string for the *PHY* node and use that?
>>
>> I just want to avoid that we introduce a binding that causes us
>> headaches later. I think we can still fix this with a followup patch
>> before the driver and its binding hit a release kernel.
>>
>> Cheers,
>> Andre.
>>
>
> I just see some patch, where "phy-mode = internal" is valid.
> I will try to find a way to use it

Can you provide a link?

I'm not a fan of using phy-mode for this. There's no guarantee what
mode the internal PHY uses. That's what phy-mode is for.

In any case, we should fix this before 4.13 is released.

ChenYu

[PATCH net-next 11/14] nfp: handle SR-IOV already enabled when driver is probing

2017-06-27 Thread Jakub Kicinski

We assumed that when we probe number of enabled VFs will be at 0.
This doesn't have to be the case for example if previous driver left
SR-IOV enabled due to some VFs being assigned.  Read the number of VFs
enabled.  Fail probe if it's above current FWs limit.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/nfp_main.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c 
b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 748e54cc885e..d47adb4c86d6 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -414,6 +414,14 @@ static int nfp_pci_probe(struct pci_dev *pdev,
if (err)
goto err_fw_unload;
 
+   pf->num_vfs = pci_num_vf(pdev);
+   if (pf->num_vfs > pf->limit_vfs) {
+   dev_err(>dev,
+   "Error: %d VFs already enabled, but loaded FW can only 
support %d\n",
+   pf->num_vfs, pf->limit_vfs);
+   goto err_fw_unload;
+   }
+
err = nfp_net_pci_probe(pf);
if (err)
goto err_sriov_unlimit;
-- 
2.11.0

[PATCH net-next 10/14] nfp: wire get_phys_port_name on representors

2017-06-27 Thread Jakub Kicinski

Make nfp_port_get_phys_port_name() support new port types and
wire it up to representors' struct net_device_ops.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 drivers/net/ethernet/netronome/nfp/nfp_net_repr.c |  1 +
 drivers/net/ethernet/netronome/nfp/nfp_port.c | 30 +--
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c 
b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 7bfdef2af1a9..046b89eb4cf2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -256,6 +256,7 @@ const struct net_device_ops nfp_repr_netdev_ops = {
.ndo_get_stats64= nfp_repr_get_stats64,
.ndo_has_offload_stats  = nfp_repr_has_offload_stats,
.ndo_get_offload_stats  = nfp_repr_get_offload_stats,
+   .ndo_get_phys_port_name = nfp_port_get_phys_port_name,
 };
 
 static void nfp_repr_clean(struct nfp_repr *repr)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c 
b/drivers/net/ethernet/netronome/nfp/nfp_port.c
index 0be6c7e0b1c1..0b44952945d8 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c
@@ -106,15 +106,31 @@ nfp_port_get_phys_port_name(struct net_device *netdev, 
char *name, size_t len)
int n;
 
port = nfp_port_from_netdev(netdev);
-   eth_port = __nfp_port_get_eth_port(port);
-   if (!eth_port)
+   if (!port)
+   return -EOPNOTSUPP;
+
+   switch (port->type) {
+   case NFP_PORT_PHYS_PORT:
+   eth_port = __nfp_port_get_eth_port(port);
+   if (!eth_port)
+   return -EOPNOTSUPP;
+
+   if (!eth_port->is_split)
+   n = snprintf(name, len, "p%d", eth_port->label_port);
+   else
+   n = snprintf(name, len, "p%ds%d", eth_port->label_port,
+eth_port->label_subport);
+   break;
+   case NFP_PORT_PF_PORT:
+   n = snprintf(name, len, "pf%d", port->pf_id);
+   break;
+   case NFP_PORT_VF_PORT:
+   n = snprintf(name, len, "pf%dvf%d", port->pf_id, port->vf_id);
+   break;
+   default:
return -EOPNOTSUPP;
+   }
 
-   if (!eth_port->is_split)
-   n = snprintf(name, len, "p%d", eth_port->label_port);
-   else
-   n = snprintf(name, len, "p%ds%d", eth_port->label_port,
-eth_port->label_subport);
if (n >= len)
return -EINVAL;
 
-- 
2.11.0

< 1 2 3 >

101 - 200 of 217 matches

Mail list logo