[PATCH net-next 1/3] ipv4: Lock-less per-packet multipath

2015-06-17 Thread Peter Nørlund
The current multipath attempted to be quasi random, but in most cases it
behaved just like a round robin balancing. This patch refactors the
algorithm to be exactly that and in doing so, avoids the spin lock.

The new design paves the way for hash-based multipath, replacing the
modulo with thresholds, minimizing disruption in case of failing paths or
route replacements.

Signed-off-by: Peter Nørlund p...@ordbogen.com
---
 include/net/ip_fib.h |   6 +--
 net/ipv4/Kconfig |   1 +
 net/ipv4/fib_semantics.c | 116 ++-
 3 files changed, 68 insertions(+), 55 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 54271ed..4be4f25 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -76,8 +76,8 @@ struct fib_nh {
unsigned intnh_flags;
unsigned char   nh_scope;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-   int nh_weight;
-   int nh_power;
+   int nh_mp_weight;
+   atomic_tnh_mp_upper_bound;
 #endif
 #ifdef CONFIG_IP_ROUTE_CLASSID
__u32   nh_tclassid;
@@ -115,7 +115,7 @@ struct fib_info {
 #define fib_advmss fib_metrics[RTAX_ADVMSS-1]
int fib_nhs;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-   int fib_power;
+   int fib_mp_weight;
 #endif
struct rcu_head rcu;
struct fib_nh   fib_nh[0];
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index d83071d..cb91f67 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -81,6 +81,7 @@ config IP_MULTIPLE_TABLES
 config IP_ROUTE_MULTIPATH
bool IP: equal cost multipath
depends on IP_ADVANCED_ROUTER
+   select BITREVERSE
help
  Normally, the routing tables specify a single action to be taken in
  a deterministic manner for a given packet. If you say Y here
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 28ec3c1..8c8df80 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -15,6 +15,7 @@
 
 #include asm/uaccess.h
 #include linux/bitops.h
+#include linux/bitrev.h
 #include linux/types.h
 #include linux/kernel.h
 #include linux/jiffies.h
@@ -57,7 +58,7 @@ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
-static DEFINE_SPINLOCK(fib_multipath_lock);
+static DEFINE_PER_CPU(u8, fib_mp_rr_counter);
 
 #define for_nexthops(fi) { \
int nhsel; const struct fib_nh *nh; \
@@ -261,7 +262,7 @@ static inline int nh_comp(const struct fib_info *fi, const 
struct fib_info *ofi)
nh-nh_gw  != onh-nh_gw ||
nh-nh_scope != onh-nh_scope ||
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-   nh-nh_weight != onh-nh_weight ||
+   nh-nh_mp_weight != onh-nh_mp_weight ||
 #endif
 #ifdef CONFIG_IP_ROUTE_CLASSID
nh-nh_tclassid != onh-nh_tclassid ||
@@ -449,6 +450,43 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int 
remaining)
return remaining  0 ? 0 : nhs;
 }
 
+static void fib_rebalance(struct fib_info *fi)
+{
+   int factor;
+   int total;
+   int w;
+
+   if (fi-fib_nhs  2)
+   return;
+
+   total = 0;
+   for_nexthops(fi) {
+   if (!(nh-nh_flags  RTNH_F_DEAD))
+   total += nh-nh_mp_weight;
+   } endfor_nexthops(fi);
+
+   if (likely(total != 0)) {
+   factor = DIV_ROUND_UP(total, 8388608);
+   total /= factor;
+   } else {
+   factor = 1;
+   }
+
+   w = 0;
+   change_nexthops(fi) {
+   int upper_bound;
+
+   if (nexthop_nh-nh_flags  RTNH_F_DEAD) {
+   upper_bound = -1;
+   } else {
+   w += nexthop_nh-nh_mp_weight / factor;
+   upper_bound = DIV_ROUND_CLOSEST(256 * w, total);
+   }
+
+   atomic_set(nexthop_nh-nh_mp_upper_bound, upper_bound);
+   } endfor_nexthops(fi);
+}
+
 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
   int remaining, struct fib_config *cfg)
 {
@@ -461,7 +499,7 @@ static int fib_get_nhs(struct fib_info *fi, struct 
rtnexthop *rtnh,
nexthop_nh-nh_flags =
(cfg-fc_flags  ~0xFF) | rtnh-rtnh_flags;
nexthop_nh-nh_oif = rtnh-rtnh_ifindex;
-   nexthop_nh-nh_weight = rtnh-rtnh_hops + 1;
+   nexthop_nh-nh_mp_weight = rtnh-rtnh_hops + 1;
 
attrlen = rtnh_attrlen(rtnh);
if (attrlen  0) {
@@ -884,7 +922,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi-fib_net-ipv4.fib_num_tclassid_users++;
 #endif
 #ifdef 

[PATCH net-next 0/3] ipv4: Hash-based multipath routing

2015-06-17 Thread Peter Nørlund
This patch series enhances the IPv4 multipath code, adding support for
hash-based multipath.

The multipath algorithm is a per-route attribute (RTA_MP_ALGO) with some
degree of binary compatibility with the old implementation (2.6.12 - 2.6.22),
but without source level compatibility since attributes have different names:

RT_MP_ALG_L3_HASH:
L3 hash-based distribution. This was IP_MP_ALG_NONE, which with the route
cache behaved somewhat like L3-based distribution. This is now the default.

RT_MP_ALG_PER_PACKET:
Per-packet distribution. Was IP_MP_ALG_RR. Uses round-robin.

RT_MP_ALG_DRR, RT_MP_ALG_RANDOM, RT_MP_ALG_WRANDOM:
Unsupported values, but reserved because they existed in 2.6.12 - 2.6.22.

RT_MP_ALG_L4_HASH:
L4 hash-based distribution. This is new.

The traditional modulo approach was replaced by a threshold-based approach,
described in RFC 2992. This reduces disruption in case of link failures or
route changes.

To better support anycast environments where PMTU usually breaks with
multipath, certain ICMP packets are hashed using the header within the
payload, ensuring that ICMP packets are routed over the same path as the
flow they belong to.

As a side effect, the multipath spinlock was removed and the code got faster.
I measured ip_mkroute_input (excl. __mkroute_input) on a Xeon X3350 (2.66GHz)
with two paths and L3 hashing:

1 thread:
Before: ~199.8 cycles(tsc)
After:   ~75.2 cycles(tsc)

4 threads:
Before: ~393.9 cycles(tsc)
After:   ~77.8 cycles(tsc)

If this patch is accepted, a follow-up patch to iproute2 will also be
submitted.

Best regards,
 Peter Nørlund

Peter Nørlund (3):
  ipv4: Lock-less per-packet multipath
  ipv4: L3 and L4 hash-based multipath routing
  ipv4: ICMP packet inspection for multipath

 include/net/ip_fib.h   |   10 ++-
 include/net/route.h|5 +
 include/uapi/linux/rtnetlink.h |   14 
 net/ipv4/Kconfig   |1 
 net/ipv4/fib_frontend.c|4 +
 net/ipv4/fib_semantics.c   |  146 +---
 net/ipv4/icmp.c|   29 +++-
 net/ipv4/route.c   |  108 +++---
 net/ipv4/xfrm4_policy.c|2 -
 9 files changed, 246 insertions(+), 73 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next 3/3] ipv4: ICMP packet inspection for multipath

2015-06-17 Thread Peter Nørlund
ICMP packets are inspected to let them route together with the flow they
belong to, allowing anycast environments to work with ECMP.

Signed-off-by: Peter Nørlund p...@ordbogen.com
---
 net/ipv4/icmp.c  | 27 ++-
 net/ipv4/route.c | 80 ++--
 2 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 3abcfea..20f1d5e 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -447,6 +447,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
 {
struct rtable *rt, *rt2;
struct flowi4 fl4_dec;
+   struct flowi4 mp_flow;
int err;
 
memset(fl4, 0, sizeof(*fl4));
@@ -459,7 +460,31 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4-fl4_icmp_type = type;
fl4-fl4_icmp_code = code;
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
-   rt = __ip_route_output_key(net, fl4, NULL);
+
+   /* Source and destination is swapped. See ip_multipath_flow */
+   mp_flow.saddr = iph-daddr;
+   mp_flow.daddr = iph-saddr;
+   mp_flow.flowi4_proto = iph-protocol;
+   mp_flow.fl4_sport = 0;
+   mp_flow.fl4_dport = 0;
+   if (!ip_is_fragment(iph)) {
+   if (iph-protocol == IPPROTO_TCP ||
+   iph-protocol == IPPROTO_UDP ||
+   iph-protocol == IPPROTO_SCTP) {
+   __be16 _ports[2];
+   const __be16 *ports;
+
+   ports = skb_header_pointer(skb_in, iph-ihl * 4,
+  sizeof(_ports),
+  _ports);
+   if (ports) {
+   mp_flow.fl4_sport = ports[1];
+   mp_flow.fl4_dport = ports[0];
+   }
+   }
+   }
+
+   rt = __ip_route_output_key(net, fl4, mp_flow);
if (IS_ERR(rt))
return rt;
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a1ec62c..bab4318 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1635,31 +1635,83 @@ out:
 /* Fill flow key data based on packet for use in multipath routing. */
 static void ip_multipath_flow(const struct sk_buff *skb, struct flowi4 *flow)
 {
-   const struct iphdr *iph;
-
-   iph = ip_hdr(skb);
-
-   flow-saddr = iph-saddr;
-   flow-daddr = iph-daddr;
-   flow-flowi4_proto = iph-protocol;
+   struct icmphdr _icmph;
+   struct iphdr _inner_iph;
+   const struct iphdr *outer_iph;
+   const struct icmphdr *icmph;
+   const struct iphdr *inner_iph;
+   unsigned int offset;
+   __be16 _ports[2];
+   const __be16 *ports;
+
+   outer_iph = ip_hdr(skb);
+
+   flow-saddr = outer_iph-saddr;
+   flow-daddr = outer_iph-daddr;
+   flow-flowi4_proto = outer_iph-protocol;
flow-fl4_sport = 0;
flow-fl4_dport = 0;
 
-   if (unlikely(ip_is_fragment(iph)))
+   if (unlikely(ip_is_fragment(outer_iph)))
return;
 
-   if (iph-protocol == IPPROTO_TCP ||
-   iph-protocol == IPPROTO_UDP ||
-   iph-protocol == IPPROTO_SCTP) {
-   __be16 _ports;
-   const __be16 *ports;
+   offset = outer_iph-ihl * 4;
 
-   ports = skb_header_pointer(skb, iph-ihl * 4, sizeof(_ports),
+   if (outer_iph-protocol == IPPROTO_TCP ||
+   outer_iph-protocol == IPPROTO_UDP ||
+   outer_iph-protocol == IPPROTO_SCTP) {
+   ports = skb_header_pointer(skb, offset, sizeof(_ports),
   _ports);
if (ports) {
flow-fl4_sport = ports[0];
flow-fl4_dport = ports[1];
}
+
+   return;
+   }
+
+   if (outer_iph-protocol != IPPROTO_ICMP)
+   return;
+
+   icmph = skb_header_pointer(skb, offset, sizeof(_icmph), _icmph);
+   if (!icmph)
+   return;
+
+   if (icmph-type != ICMP_DEST_UNREACH 
+   icmph-type != ICMP_SOURCE_QUENCH 
+   icmph-type != ICMP_REDIRECT 
+   icmph-type != ICMP_TIME_EXCEEDED 
+   icmph-type != ICMP_PARAMETERPROB) {
+   return;
+   }
+
+   offset += sizeof(_icmph);
+   inner_iph = skb_header_pointer(skb, offset, sizeof(_inner_iph),
+  _inner_iph);
+   if (inner_iph)
+   return;
+
+   /* Since the ICMP payload contains a packet sent from the current
+* recipient, we swap source and destination addresses and ports
+*/
+   flow-saddr = inner_iph-daddr;
+   flow-daddr = inner_iph-saddr;
+   flow-flowi4_proto = inner_iph-protocol;
+
+   if (unlikely(ip_is_fragment(inner_iph)))
+   return;
+
+   if (inner_iph-protocol != IPPROTO_TCP 
+   inner_iph-protocol != IPPROTO_UDP 
+ 

[PATCH net-next 2/3] ipv4: L3 and L4 hash-based multipath routing

2015-06-17 Thread Peter Nørlund
This patch adds L3 and L4 hash-based multipath routing, selectable on a
per-route basis with the reintroduced RTA_MP_ALGO attribute. The default is
now RT_MP_ALG_L3_HASH.

Signed-off-by: Peter Nørlund p...@ordbogen.com
---
 include/net/ip_fib.h   |  4 ++-
 include/net/route.h|  5 ++--
 include/uapi/linux/rtnetlink.h | 14 ++-
 net/ipv4/fib_frontend.c|  4 +++
 net/ipv4/fib_semantics.c   | 34 ++---
 net/ipv4/icmp.c|  4 +--
 net/ipv4/route.c   | 56 +++---
 net/ipv4/xfrm4_policy.c|  2 +-
 8 files changed, 103 insertions(+), 20 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 4be4f25..250d98e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -37,6 +37,7 @@ struct fib_config {
u32 fc_flags;
u32 fc_priority;
__be32  fc_prefsrc;
+   int fc_mp_alg;
struct nlattr   *fc_mx;
struct rtnexthop*fc_mp;
int fc_mx_len;
@@ -116,6 +117,7 @@ struct fib_info {
int fib_nhs;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_mp_weight;
+   int fib_mp_alg;
 #endif
struct rcu_head rcu;
struct fib_nh   fib_nh[0];
@@ -308,7 +310,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev);
 int fib_sync_down_dev(struct net_device *dev, int force);
 int fib_sync_down_addr(struct net *net, __be32 local);
 int fib_sync_up(struct net_device *dev);
-void fib_select_multipath(struct fib_result *res);
+void fib_select_multipath(struct fib_result *res, const struct flowi4 *flow);
 
 /* Exported by fib_trie.c */
 void fib_trie_init(void);
diff --git a/include/net/route.h b/include/net/route.h
index fe22d03..1fc7deb 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -110,7 +110,8 @@ struct in_device;
 int ip_rt_init(void);
 void rt_cache_flush(struct net *net);
 void rt_flush_dev(struct net_device *dev);
-struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
+struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp,
+const struct flowi4 *mp_flow);
 struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
struct sock *sk);
 struct dst_entry *ipv4_blackhole_route(struct net *net,
@@ -267,7 +268,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 
*fl4,
  sport, dport, sk);
 
if (!dst || !src) {
-   rt = __ip_route_output_key(net, fl4);
+   rt = __ip_route_output_key(net, fl4, NULL);
if (IS_ERR(rt))
return rt;
ip_rt_put(rt);
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 17fb02f..dff4a72 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -271,6 +271,18 @@ enum rt_scope_t {
 #define RTM_F_EQUALIZE 0x400   /* Multipath equalizer: NI  */
 #define RTM_F_PREFIX   0x800   /* Prefix addresses */
 
+/* Multipath algorithms */
+
+enum rt_mp_alg_t {
+   RT_MP_ALG_L3_HASH,  /* Was IP_MP_ALG_NONE */
+   RT_MP_ALG_PER_PACKET,   /* Was IP_MP_ALG_RR */
+   RT_MP_ALG_DRR,  /* not used */
+   RT_MP_ALG_RANDOM,   /* not used */
+   RT_MP_ALG_WRANDOM,  /* not used */
+   RT_MP_ALG_L4_HASH,
+   __RT_MP_ALG_MAX
+};
+
 /* Reserved table identifiers */
 
 enum rt_class_t {
@@ -301,7 +313,7 @@ enum rtattr_type_t {
RTA_FLOW,
RTA_CACHEINFO,
RTA_SESSION, /* no longer used */
-   RTA_MP_ALGO, /* no longer used */
+   RTA_MP_ALGO,
RTA_TABLE,
RTA_MARK,
RTA_MFC_STATS,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 872494e..376e8c1 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -590,6 +590,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_PREFSRC]   = { .type = NLA_U32 },
[RTA_METRICS]   = { .type = NLA_NESTED },
[RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
+   [RTA_MP_ALGO]   = { .type = NLA_U32 },
[RTA_FLOW]  = { .type = NLA_U32 },
 };
 
@@ -650,6 +651,9 @@ static int rtm_to_fib_config(struct net *net, struct 
sk_buff *skb,
cfg-fc_mp = nla_data(attr);
cfg-fc_mp_len = nla_len(attr);
break;
+   case RTA_MP_ALGO:
+   cfg-fc_mp_alg = nla_get_u32(attr);
+   break;
case RTA_FLOW:
cfg-fc_flow = nla_get_u32(attr);
break;
diff --git a/net/ipv4/fib_semantics.c 

Re: [PATCH net-next 00/15] Simplify netfilter and network namespaces

2015-06-17 Thread Julian Anastasov

Hello,

On Sun, 14 Jun 2015, Eric W. Biederman wrote:

 This patshset roots out all of the very weird network namespace
 computation logic (except for the code in ipvs) and fixes it.  I really
 don't like how the code has been essentially guessing  which network
 namespace to use.
 
 Probably the worst guessing is in ipvs in the function skb_net. I have
 some preliminary changes to fix ipvs but they are not quite ready yet.
 Cleaning up ipvs enough that I can kill skb_net is on my short list.

For IPVS skb_net is too complicated. One of
the first things we do in hook handler is to check
skb_dst, so even now dev_net(skb_dst(skb)-dev) should
work. sock_net is used for administration via netlink,
skb_net is not used there (ip_vs_ctl.c).

As for removing/replacing skb_net, ip_vs_conn_net()
is ok when cp is present, we have also svc-net, otherwise
we can store net into struct ip_vs_iphdr, it is filled by
ip_vs_fill_iph_skb from small number of places when hook
handler is entered.

Regards

--
Julian Anastasov j...@ssi.bg
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


DSA: Exposing CPU port [Was: Re: [PATCH 3/3] net: dsa: Allow configuration of CPU DSA port speeds/duplex]

2015-06-17 Thread Florian Fainelli
2015-06-17 11:09 GMT-07:00 Vivien Didelot vivien.dide...@savoirfairelinux.com:
 Hi Andrew, All,

 On 12/06/15 10:18, Andrew Lunn wrote:
 By default, DSA and CPU ports are configured to the maximum speed the
 switch supports. However there can be use cases where the peer device
 port is slower. Allow a fixed-link property to be used with the DSA
 and CPU port in the device tree, and use this information to configure
 the port.

 Would it be a good idea for DSA to expose the cpu port to userspace as well?
 That way, it'd be possible to use ethtool to set the port speed and duplex
 mode, or dump registers (this would have saved me quite some time in dev).

My problem with that approach would be that we would expose a cpu
net_device in a way that it is not usable beyond statistics and
control knobs. In terms of data-path, you would not really want to
have it usable (sending data from the CPU to other ports, that's
already what other net_devices do), as it would be a duplicate
interface with respect to how the master net_device in DSA (aka
unmodified Ethernet driver) works. Having e.g: eth0 send DSA-tagged
packets today is already very confusing to users (they do not
necessarily understand why this interface does or how it works), so
having a cpu interface would cause more trouble here.


 Also, in my RFC for 802.1Q support [1], I assume the CPU port to be a tagged
 member of each VLAN. But someone may want to add a VLAN with swp3 and swp4
 only, and another VLAN with swp0, swp1 and the CPU port. Am I correct? This is
 currently not possible, but with an exposed cpu interface, the user could
 explicitly add the CPU port to a VLAN.

If we do put, say swp0 and swp1 in VLAN1, and CPU port is not in this
VLAN1, we cannot learn any traffic from it, this might be an
acceptable use-case, but I am not sure if there is much we get from
not adding the CPU to this VLAN membership, am I missing something?
-- 
Florian
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: macb napi strange behavior

2015-06-17 Thread Jaeden Amero
On 06/17/2015 11:09 AM, Nicolae Rosia wrote:
 I'm trying to determine why I have a huge number of IRQs for only a
 macb interface and the other one works just fine (low IRQ activity). I
 have activated IP forward and I'm just forwarding packets from eth0 to
 eth1.
 The platform is Zynq7, Linux kernel 4.0, vanilla macb.
 
 cat /proc/interrupts:
 [...]
 144: 679425 0   GIC  54  eth0
 145:   17867097  0   GIC  77  eth1
 [...]
 
 Any ideas?

The times we've seen tons of interrupts on Ethernet with interrupts
routed through the PL was when the FPGA was unprogrammed (or in the
process of being reprogrammed), or was configured with the interrupt
line tied to asserted.

In the latter case, Linux would eventually stop handling any more
interrupts for that port due to the interrupt storm.

In the former case, there isn't much one can do except make sure that
any FPGA-routed interrupts are unregistered and disabled before FPGA
reprogramming and then to re-enable those interrupts after reprogramming.

It'd be nice to have some sort of notification to drivers, given when
the FPGA state changes, when hardware the drivers are responsible for
disappears or gets disconnected. This is an area of research for us at NI.

Cheers,
Jaeden
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next 0/3 v4] changes to make ipv4 routing table aware of next-hop link status

2015-06-17 Thread Andy Gospodarek
On Wed, Jun 17, 2015 at 06:05:32PM +0200, Nicolas Dichtel wrote:
 Le 15/06/2015 18:33, Andy Gospodarek a écrit :
 This series adds the ability to have the Linux kernel track whether or
 not a particular route should be used based on the link-status of the
 interface associated with the next-hop.
 
 Before this patch any link-failure on an interface that was serving as a
 gateway for some systems could result in those systems being isolated
 from the rest of the network as the stack would continue to attempt to
 send frames out of an interface that is actually linked-down.  When the
 kernel is responsible for all forwarding, it should also be responsible
 for taking action when the traffic can no longer be forwarded -- there
 is no real need to outsource link-monitoring to userspace anymore.
 
 This feature is only enabled with the new per-interface or ipv4 global
 sysctls called 'ignore_routes_with_linkdown'.
 
 net.ipv4.conf.all.ignore_routes_with_linkdown = 0
 net.ipv4.conf.default.ignore_routes_with_linkdown = 0
 net.ipv4.conf.lo.ignore_routes_with_linkdown = 0
 ...
 Sorry for my late reply, but is it possible to advertise this sysctl via the
 netconf infra (grep NETCONFA_)?
I would be happy to do that.  Do you see it as a requirement for this
set to be accepted?

I've got to add ipv6 support, so netconf support could also easily be
added in the kernel and iproute2.

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net v2] packet: avoid out of bounds read in round robin fanout

2015-06-17 Thread Willem de Bruijn
From: Willem de Bruijn will...@google.com

PACKET_FANOUT_LB computes f-rr_cur such that it is modulo
f-num_members. It returns the old value unconditionally, but
f-num_members may have changed since the last store. Ensure
that the return value is always  num.

When modifying the logic, simplify it further by replacing the loop
with an unconditional atomic increment.

Fixes: dc99f600698d (packet: Add fanout support.)
Suggested-by: Eric Dumazet eduma...@google.com
Signed-off-by: Willem de Bruijn will...@google.com
---
 net/packet/af_packet.c | 18 ++
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b5989c6..104f902 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1272,16 +1272,6 @@ static void packet_sock_destruct(struct sock *sk)
sk_refcnt_debug_dec(sk);
 }
 
-static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
-{
-   int x = atomic_read(f-rr_cur) + 1;
-
-   if (x = num)
-   x = 0;
-
-   return x;
-}
-
 static unsigned int fanout_demux_hash(struct packet_fanout *f,
  struct sk_buff *skb,
  unsigned int num)
@@ -1293,13 +1283,9 @@ static unsigned int fanout_demux_lb(struct packet_fanout 
*f,
struct sk_buff *skb,
unsigned int num)
 {
-   int cur, old;
+   unsigned int val = atomic_inc_return(f-rr_cur);
 
-   cur = atomic_read(f-rr_cur);
-   while ((old = atomic_cmpxchg(f-rr_cur, cur,
-fanout_rr_next(f, num))) != cur)
-   cur = old;
-   return cur;
+   return val % num;
 }
 
 static unsigned int fanout_demux_cpu(struct packet_fanout *f,
-- 
2.4.3.573.g4eafbef

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 0/2] sctp: add new getsockopt option SCTP_SOCKOPT_PEELOFF_KERNEL

2015-06-17 Thread Neil Horman
On Wed, Jun 17, 2015 at 10:40:26AM -0300, Marcelo Ricardo Leitner wrote:
 On 17-06-2015 10:16, Neil Horman wrote:
 On Wed, Jun 17, 2015 at 09:40:32AM -0300, Marcelo Ricardo Leitner wrote:
 On 17-06-2015 09:20, Neil Horman wrote:
 On Wed, Jun 17, 2015 at 08:38:10AM -0300, Marcelo Ricardo Leitner wrote:
 On 17-06-2015 07:21, Neil Horman wrote:
 On Tue, Jun 16, 2015 at 07:42:31PM -0300, Marcelo Ricardo Leitner wrote:
 Hi,
 
 I'm trying to remove a direct dependency of dlm module on sctp one.
 Currently dlm code is calling sctp_do_peeloff() directly and only this
 call is causing the load of sctp module together with dlm. For that, we
 have basically 3 options:
 - Doing a module split on dlm
- which I'm avoiding because it was already split and was merged 
  (more
  info on patch2 changelog)
- and the sctp code on it is rather small if compared with sctp 
  module
  itself
 - Using some other infra that gets indirectly activated, like 
 getsockopt()
- It was like this before, but the exposed sockopt created a file
  descriptor for the new socket and that create some serious issues.
  More info on 2f2d76cc3e93 (dlm: Do not allocate a fd for peeloff)
 - Doing something like ipv6_stub (which is used by vxlan) or similar
- but I don't feel that's a good way out here, it doesn't feel right.
 
 So I'm approaching this by going with 2nd option again but this time
 also creating a new sockopt that is only accessible for kernel users of
 this protocol, so that we are safe to directly return a struct socket *
 via getsockopt() results. This is the tricky part of it of this series.
 
 It smells hacky yes but currently most of sctp calls are wrapped behind
 kernel_*(). Even if we set a flag (like netlink does) saying that this
 is a kernel socket, we still have the issue of getting the function call
 through and returning such non-usual return value.
 
 I kept __user marker on sctp_getsockopt_peeloff_kernel() prototype and
 its helpers just to avoid issues with static checkers.
 
 Kernel path not really tested yet.. mainly willing to know what do you
 think, is this feasible? getsockopt option only reachable by kernel
 itself? Couldn't find any other like this.
 
 Thanks,
 Marcelo
 
 Marcelo Ricardo Leitner (2):
sctp: add new getsockopt option SCTP_SOCKOPT_PEELOFF_KERNEL
dlm: avoid using sctp_do_peeloff directly
 
   fs/dlm/lowcomms.c | 17 -
   include/uapi/linux/sctp.h | 12 
   net/sctp/socket.c | 39 +++
   3 files changed, 59 insertions(+), 9 deletions(-)
 
 --
 2.4.1
 
 
 
 Why not just use the existing PEELOFF socket option with the 
 kernel_getsockopt
 interface, and sockfd_lookup to translate the returned value back to a 
 socket
 struct?  That seems less redundant and less hack-ish to me.
 
 It was like that before commit 2f2d76cc3e93 (dlm: Do not allocate a fd 
 for
 peeloff), but it caused serious issues due to the fd allocation, so 
 that's
 what I'm willing to avoid now.
 
 References:
 http://article.gmane.org/gmane.linux.network.drbd/22529
 https://bugzilla.redhat.com/show_bug.cgi?id=1075629 (this one is closed,
 sorry)
 
Marcelo
 
 Ah, I see.  You're using the new socket option as a differentiator to just 
 skip
 the creation of an FD.
 
 Exactly.
 
 I get your reasoning, but I'm still not in love with the idea of 
 duplicating
 code paths to avoid that action.  Can we use some data inside the socket
 structure to do this differentiation?  Specifically here I'm thinking of
 sock-file.  IIRC that will be non-null for any sockets created in user 
 space,
 
 I had thought about using some socket flags like netlink does but couldn't
 get around with that. Hadn't thought about sock-file though, nice idea.
 
 but will always be NULL for dlm created sockets (since we use sock_create
 directly to create them.  If that is a sufficient differentiator, then we 
 can
 just optionally allocate the new socket fd for the peeled off socket, iff 
 the
 parent sock-file pointer is non-null.
 
 Thoughts?
 Neil
 
 We can re-use the current code path, by either checking it via sock-file or
 via get_fs(). That will require us to change the option arg format so we
 keep it nice and clean but as it would be kernel-side only, it should be ok
 right? It currently is:
 
 typedef struct {
  sctp_assoc_t associd;
  int sd;
 } sctp_peeloff_arg_t;
 
 And we would have to fit a pointer in there, something like:
 typedef union {
 struct {
 sctp_assoc_t associd;
 int sd;
 };
 void *sock;
 } sctp_peeloff_arg_t;
 
 Sounds good?
 
 Yes, sounds reasonable.
 
 Thanks!
 Neil
 
 Cool, thanks Neil. I'll rework these now but will post the new version
 probably by next week only, as we can get dlm properly tested too.
 
Worksforme :)
Neil

 Cheers,
 Marcelo
 
 
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to 

Re: [PATCH] xilinx:Remove unused variable num_frag in the function axienet_start_xmit

2015-06-17 Thread Sören Brinkmann
On Wed, 2015-06-17 at 12:21PM -0400, Nicholas Krause wrote:
 
 
 On June 17, 2015 12:07:30 PM EDT, Sören Brinkmann 
 soren.brinkm...@xilinx.com wrote:
 On Wed, 2015-06-17 at 11:52AM -0400, Nicholas Krause wrote:
  This removes the unused variable num_frag and the setting of it
  to the number of fragments from the passed sk_buff pointer by
  this function's caller due to this variable never being used
  in this particular function and is not declared as  global so
  setting it for global use in this file is pointless.
  
  Signed-off-by: Nicholas Krause xerofo...@gmail.com
  ---
   drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 --
   1 file changed, 2 deletions(-)
  
  diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
 b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
  index 28b7e7d..624dce4 100644
  --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
  +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
  @@ -651,7 +651,6 @@ static inline int
 axienet_check_tx_bd_space(struct axienet_local *lp,
   static int axienet_start_xmit(struct sk_buff *skb, struct net_device
 *ndev)
   {
 u32 ii;
  -  u32 num_frag;
 u32 csum_start_off;
 u32 csum_index_off;
 skb_frag_t *frag;
  @@ -659,7 +658,6 @@ static int axienet_start_xmit(struct sk_buff
 *skb, struct net_device *ndev)
 struct axienet_local *lp = netdev_priv(ndev);
 struct axidma_bd *cur_p;
   
  -  num_frag = skb_shinfo(skb)-nr_frags;
 cur_p = lp-tx_bd_v[lp-tx_bd_tail];
   
 if (axienet_check_tx_bd_space(lp, num_frag)) {
 
 It's used right here, isn't it (and further down in this function too)?
 Does your tree compile with this change?
 
  Sören
 My tree does compile with this change. 

You have to fix your testing process:
CHK include/config/kernel.release
UPD include/config/kernel.release
CHK include/generated/uapi/linux/version.h
CHK include/generated/utsrelease.h
UPD include/generated/utsrelease.h
CHK include/generated/bounds.h
CHK include/generated/timeconst.h
CHK include/generated/asm-offsets.h
CALLscripts/checksyscalls.sh
  stdin:1253:2: warning: #warning syscall userfaultfd not implemented [-Wcpp]
CC  drivers/net/ethernet/xilinx/xilinx_axienet_main.o
  drivers/net/ethernet/xilinx/xilinx_axienet_main.c: In function 
'axienet_start_xmit':
  drivers/net/ethernet/xilinx/xilinx_axienet_main.c:667:36: error: 'num_frag' 
undeclared (first use in this function)
if (axienet_check_tx_bd_space(lp, num_frag)) {
  ^
  drivers/net/ethernet/xilinx/xilinx_axienet_main.c:667:36: note: each 
undeclared identifier is reported only once for each function it appears in
  scripts/Makefile.build:258: recipe for target 
'drivers/net/ethernet/xilinx/xilinx_axienet_main.o' failed
  make[1]: *** [drivers/net/ethernet/xilinx/xilinx_axienet_main.o] Error 1
  Makefile:1545: recipe for target 
'drivers/net/ethernet/xilinx/xilinx_axienet_main.o' failed
  make: *** [drivers/net/ethernet/xilinx/xilinx_axienet_main.o] Error 2

NACK for the patch.

Sören
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next 0/3 v4] changes to make ipv4 routing table aware of next-hop link status

2015-06-17 Thread Nicolas Dichtel

Le 17/06/2015 21:10, Andy Gospodarek a écrit :

On Wed, Jun 17, 2015 at 06:05:32PM +0200, Nicolas Dichtel wrote:

Le 15/06/2015 18:33, Andy Gospodarek a écrit :

This series adds the ability to have the Linux kernel track whether or
not a particular route should be used based on the link-status of the
interface associated with the next-hop.

Before this patch any link-failure on an interface that was serving as a
gateway for some systems could result in those systems being isolated

from the rest of the network as the stack would continue to attempt to

send frames out of an interface that is actually linked-down.  When the
kernel is responsible for all forwarding, it should also be responsible
for taking action when the traffic can no longer be forwarded -- there
is no real need to outsource link-monitoring to userspace anymore.

This feature is only enabled with the new per-interface or ipv4 global
sysctls called 'ignore_routes_with_linkdown'.

net.ipv4.conf.all.ignore_routes_with_linkdown = 0
net.ipv4.conf.default.ignore_routes_with_linkdown = 0
net.ipv4.conf.lo.ignore_routes_with_linkdown = 0
...

Sorry for my late reply, but is it possible to advertise this sysctl via the
netconf infra (grep NETCONFA_)?

I would be happy to do that.  Do you see it as a requirement for this
set to be accepted?

No, but it would be great to do it before net-next closes.



I've got to add ipv6 support, so netconf support could also easily be
added in the kernel and iproute2.

Ok.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 01/12] fsl/fman: Add the FMan FLIB headers

2015-06-17 Thread Scott Wood
On Wed, 2015-06-17 at 09:59 -0500, Liberman Igal-B31950 wrote:
 
 Regards,
 Igal Liberman.
 
  -Original Message-
  From: Wood Scott-B07421
  Sent: Wednesday, June 10, 2015 9:54 PM
  To: Bucur Madalin-Cristian-B32716
  Cc: netdev@vger.kernel.org; linux-ker...@vger.kernel.org; linuxppc-
  d...@lists.ozlabs.org; Liberman Igal-B31950
  Subject: Re: [PATCH 01/12] fsl/fman: Add the FMan FLIB headers
  
  On Wed, 2015-06-10 at 18:21 +0300, Madalin Bucur wrote:
   From: Igal Liberman igal.liber...@freescale.com
   
   This patch presents the FMan Foundation Libraries (FLIB) headers.
   The FMan FLib provides the basic API used by the FMan drivers to
   configure and control the FMan hardware.
   
   Signed-off-by: Igal Liberman igal.liber...@freescale.com
   ---
.../ethernet/freescale/fman/flib/common/general.h  |  41 ++
.../net/ethernet/freescale/fman/flib/fsl_fman.h| 609
   +
2 files changed, 650 insertions(+)
create mode 100644
   drivers/net/ethernet/freescale/fman/flib/common/general.h
create mode 100644
   drivers/net/ethernet/freescale/fman/flib/fsl_fman.h
  
  Why do we need separate patches just for headers?
  
 
 We wanted to make the patches smaller, it's the main reason for this 
 separation.

Patches should be divided by function, not arbitrarily in order to 
decrease size.  Splitting like this makes it harder to see the whole 
picture, to search for identifiers, etc.

The right way to make these patches smaller is to remove unnecessary 
features.


  What does the flib directory mean, in the context of Linux?  If 
  someone were
  to add code to this driver, how do they know if the code should go 
  into the
  flib directory or not?
  
   
   +#define iowrite32be(val, addr)   out_be32((*addr), 
   val)
   +#define ioread32be(addr) in_be32((*addr))
  
  iowrite32be()/ioread32be() are already defined for all relevant 
  architectures.
  Why are you redefining them into something PPC- specific?
  
 
 Removed those. 

Please don't stop at the specific things I'm pointing out.

   +/* do not change! if changed, must be disabled for rev1 ! */ 
   #define
   +DEFAULT_HALT_ON_EXTERNAL_ACTIVATION  false
   +/* do not change! if changed, must be disabled for rev1 ! */ 
   #define
   +DEFAULT_HALT_ON_UNRECOVERABLE_ECC_ERROR false
  
  rev1 of what chip?
  
 
 P4080. I'll update the comments. 

No.  p4080rev1 is not supported, and in any case this is not the right 
way to select different errata for different chips.

-Scott


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] net: dsa: Allow configuration of CPU DSA port speeds/duplex

2015-06-17 Thread Vivien Didelot
Hi Andrew, All,

On 12/06/15 10:18, Andrew Lunn wrote:
 By default, DSA and CPU ports are configured to the maximum speed the
 switch supports. However there can be use cases where the peer device
 port is slower. Allow a fixed-link property to be used with the DSA
 and CPU port in the device tree, and use this information to configure
 the port.

Would it be a good idea for DSA to expose the cpu port to userspace as well?
That way, it'd be possible to use ethtool to set the port speed and duplex
mode, or dump registers (this would have saved me quite some time in dev).

Also, in my RFC for 802.1Q support [1], I assume the CPU port to be a tagged
member of each VLAN. But someone may want to add a VLAN with swp3 and swp4
only, and another VLAN with swp0, swp1 and the CPU port. Am I correct? This is
currently not possible, but with an exposed cpu interface, the user could
explicitly add the CPU port to a VLAN.

Sorry if this is a bit off-topic.

[1] https://lkml.org/lkml/2015/6/1/752

Thanks,
-v
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next v2] ipv4: include NLM_F_APPEND flag in append route notifications

2015-06-17 Thread Scott Feldman
On Wed, Jun 17, 2015 at 11:07 AM, Roopa Prabhu
ro...@cumulusnetworks.com wrote:
 From: Roopa Prabhu ro...@cumulusnetworks.com

 This patch adds NLM_F_APPEND flag to struct nlmsg_hdr-nlmsg_flags
 in newroute notifications if the route add was an append.
 (This is similar to how NLM_F_REPLACE is already part of new
 route replace notifications today)

 This helps userspace determine if the route add operation was
 an append.

 Signed-off-by: Roopa Prabhu ro...@cumulusnetworks.com
 ---
 v2: flip if condition around append and change |= to =
 (feedback from Alexander Duyck and Scott Feldman)

Is this a bug fix for net, or a new feature for net-next?  Regardless,

Acked-by: Scott Feldman sfel...@gmail.com
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next 0/3 v4] changes to make ipv4 routing table aware of next-hop link status

2015-06-17 Thread Andy Gospodarek
On Wed, Jun 17, 2015 at 09:52:11PM +0200, Nicolas Dichtel wrote:
 Le 17/06/2015 21:10, Andy Gospodarek a écrit :
 On Wed, Jun 17, 2015 at 06:05:32PM +0200, Nicolas Dichtel wrote:
 Le 15/06/2015 18:33, Andy Gospodarek a écrit :
 This series adds the ability to have the Linux kernel track whether or
 not a particular route should be used based on the link-status of the
 interface associated with the next-hop.
 
 Before this patch any link-failure on an interface that was serving as a
 gateway for some systems could result in those systems being isolated
 from the rest of the network as the stack would continue to attempt to
 send frames out of an interface that is actually linked-down.  When the
 kernel is responsible for all forwarding, it should also be responsible
 for taking action when the traffic can no longer be forwarded -- there
 is no real need to outsource link-monitoring to userspace anymore.
 
 This feature is only enabled with the new per-interface or ipv4 global
 sysctls called 'ignore_routes_with_linkdown'.
 
 net.ipv4.conf.all.ignore_routes_with_linkdown = 0
 net.ipv4.conf.default.ignore_routes_with_linkdown = 0
 net.ipv4.conf.lo.ignore_routes_with_linkdown = 0
 ...
 Sorry for my late reply, but is it possible to advertise this sysctl via the
 netconf infra (grep NETCONFA_)?
 I would be happy to do that.  Do you see it as a requirement for this
 set to be accepted?
 No, but it would be great to do it before net-next closes.
Will do.

 
 
 I've got to add ipv6 support, so netconf support could also easily be
 added in the kernel and iproute2.
 Ok.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: macb napi strange behavior

2015-06-17 Thread Florian Fainelli
2015-06-17 12:00 GMT-07:00 Nicolae Rosia nicolae.ro...@gmail.com:
 Hi,

 On Wed, Jun 17, 2015 at 9:54 PM, Jaeden Amero jaeden.am...@ni.com wrote:
 On 06/17/2015 11:09 AM, Nicolae Rosia wrote:
 The times we've seen tons of interrupts on Ethernet with interrupts
 routed through the PL was when the FPGA was unprogrammed (or in the
 process of being reprogrammed), or was configured with the interrupt
 line tied to asserted.

 In the latter case, Linux would eventually stop handling any more
 interrupts for that port due to the interrupt storm.

 This isn't the case. The FPGA is programmed, and indeed I'm using the
 second MAC routed through PL to SFP.
 The interesting thing is that I'm seeing the exact behavior on the
 other side (another Zynq7 board), with eth0 having lots of interrupts.
 It seems that the interface receiving packets doesn't have a high IRQ
 activity in contrast to the one sending packets.

Typically, NAPI is used at the receive side of the Ethernet NIC/driver
to lower the hard/soft interrupt context switch, although there is
nothing that prevent you to implement a similar scheme for the
transmit side. Usually, for transmit you will be submitting one packet
for transmission and get a completion interrupt, so without interrupt
coalescing (software or hardware) you can end-up with 1 interrupt per
packet transmitted.
-- 
Florian
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC V3] net: don't wait for order-3 page allocation

2015-06-17 Thread David Rientjes
On Fri, 12 Jun 2015, Vlastimil Babka wrote:

  diff --git a/net/core/skbuff.c b/net/core/skbuff.c
  index 3cfff2a..41ec022 100644
  --- a/net/core/skbuff.c
  +++ b/net/core/skbuff.c
  @@ -4398,7 +4398,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long
  header_len,
  
  while (order) {
  if (npages = 1  order) {
  -   page = alloc_pages(gfp_mask |
  +   page = alloc_pages((gfp_mask  ~__GFP_WAIT) |
 __GFP_COMP |
 __GFP_NOWARN |
 __GFP_NORETRY,
 
 Note that __GFP_NORETRY is weaker than ~__GFP_WAIT and thus redundant. But it
 won't hurt anything leaving it there. And you might consider __GFP_NO_KSWAPD
 instead, as I said in the other thread.
 

Yeah, I agreed with __GFP_NO_KSWAPD to avoid utilizing memory reserves for 
this.

  diff --git a/net/core/sock.c b/net/core/sock.c
  index 292f422..e9855a4 100644
  --- a/net/core/sock.c
  +++ b/net/core/sock.c
  @@ -1883,7 +1883,7 @@ bool skb_page_frag_refill(unsigned int sz, struct
  page_frag *pfrag, gfp_t gfp)
  
  pfrag-offset = 0;
  if (SKB_FRAG_PAGE_ORDER) {
  -   pfrag-page = alloc_pages(gfp | __GFP_COMP |
  +   pfrag-page = alloc_pages((gfp  ~__GFP_WAIT) | __GFP_COMP |
__GFP_NOWARN | __GFP_NORETRY,
SKB_FRAG_PAGE_ORDER);
  if (likely(pfrag-page)) {
  
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next] switchdev: change BUG_ON to WARN for attr set failure case

2015-06-17 Thread Scott Feldman
On Thu, Jun 11, 2015 at 4:27 PM, David Miller da...@davemloft.net wrote:
 From: sfel...@gmail.com
 Date: Thu, 11 Jun 2015 11:20:42 -0700

 From: Scott Feldman sfel...@gmail.com

 This particular BUG_ON condition was checking for attr set err in the
 COMMIT phase, which isn't expected (it's a driver bug if PREPARE phase is
 OK but COMMIT fails).  But BUG_ON() is too strong for this case, so change
 to WARN().  BUG_ON() would be warranted if the system was corrupted beyond
 repair, but this is not the case here.

 Signed-off-by: Scott Feldman sfel...@gmail.com

 Applied, thanks Scott.

I think this one slipped thru the cracks.  I'm not seeing it in net-next.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next] switchdev: fdb filter_dev is always NULL for self (device), so remove check

2015-06-17 Thread sfeldma
From: Scott Feldman sfel...@gmail.com

Remove the filter_dev check when dumping fdb entries, otherwise dump
returns empty list.  filter_dev is always passed as NULL when dumping fdbs
on SELF.  We want the fdbs installed on the device to be listed in the
dump.

Signed-off-by: Scott Feldman sfel...@gmail.com
Fixes: 45d4122c (switchdev: add support for fdb add/del/dump via 
switchdev_port_obj ops)
---
 net/switchdev/switchdev.c |6 --
 1 file changed, 6 deletions(-)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index a5d0f8e..7dda437 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -657,7 +657,6 @@ struct switchdev_fdb_dump {
struct switchdev_obj obj;
struct sk_buff *skb;
struct netlink_callback *cb;
-   struct net_device *filter_dev;
int idx;
 };
 
@@ -670,14 +669,10 @@ static int switchdev_port_fdb_dump_cb(struct net_device 
*dev,
u32 seq = dump-cb-nlh-nlmsg_seq;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
-   struct net_device *master = netdev_master_upper_dev_get(dev);
 
if (dump-idx  dump-cb-args[0])
goto skip;
 
-   if (master  dump-filter_dev != master)
-   goto skip;
-
nlh = nlmsg_put(dump-skb, portid, seq, RTM_NEWNEIGH,
sizeof(*ndm), NLM_F_MULTI);
if (!nlh)
@@ -731,7 +726,6 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct 
netlink_callback *cb,
},
.skb = skb,
.cb = cb,
-   .filter_dev = filter_dev,
.idx = idx,
};
int err;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 0/3] Fix Ethernet jumbo frames support for Armada 370 and 38x

2015-06-17 Thread Thomas Petazzoni
Simon,

On Wed, 17 Jun 2015 19:22:17 +0200, Simon Guinot wrote:

  You should add a Fixes: tag to each commit to indicate which commit is
  being fixed by your patches.
  
  Also, I was a bit surprised by your statement that Armada 38x is also
  affected by the problem, since Armada 38x is more recent than Armada
  XP. but indeed, according to the Armada 38x datasheet:
  
IPv4 and TCP/UDP over IPv4/IPv6 checksum generation on transmit
frames for standard Ethernet packet size
  
  While the Armada XP datasheet says:
  
Long frames transmission (including jumbo frames), with
IPv4/v6/TCP/UDP checksum generation
  
  So it seems like you're right about this!
 
 At first, I though this was an error in the Armada 38x datasheet (maybe
 a sloppy copy/paster). Therefore I have checked on an DB-88F6820-GP
 board and then I can confirm that the Armada 38x is also affected.

Ok, so the datasheet was correct (houra!). Thanks a lot for having done
the additional testing.

Thomas
-- 
Thomas Petazzoni, CTO, Free Electrons
Embedded Linux, Kernel and Android engineering
http://free-electrons.com
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next 00/15] Simplify netfilter and network namespaces

2015-06-17 Thread Eric W. Biederman
Julian Anastasov j...@ssi.bg writes:

   Hello,

 On Sun, 14 Jun 2015, Eric W. Biederman wrote:

 This patshset roots out all of the very weird network namespace
 computation logic (except for the code in ipvs) and fixes it.  I really
 don't like how the code has been essentially guessing  which network
 namespace to use.
 
 Probably the worst guessing is in ipvs in the function skb_net. I have
 some preliminary changes to fix ipvs but they are not quite ready yet.
 Cleaning up ipvs enough that I can kill skb_net is on my short list.

   For IPVS skb_net is too complicated. One of
 the first things we do in hook handler is to check
 skb_dst, so even now dev_net(skb_dst(skb)-dev) should
 work.

A couple of things:
- You don't check skb_dst() first thing in all hooks.

- All hooks have state-net after my changes so it gets
  a lot easier.

- I am trying to avoid dev_net(skb_dst(skb)-dev) if I can
  because of my ulterior motive of allowing a destination
  network device in another network namespace.

  sock_net is used for administration via netlink,
 skb_net is not used there (ip_vs_ctl.c).

Agreed.  sock_net is the right thing for the netlink sockets,
and while obscured by skb_sknet that works fine.

   As for removing/replacing skb_net, ip_vs_conn_net()
 is ok when cp is present,

Yes I have seen that.

 we have also svc-net,

And I have seen this.

  otherwise
 we can store net into struct ip_vs_iphdr, it is filled by
 ip_vs_fill_iph_skb from small number of places when hook
 handler is entered.

Interesting I had not thought of changing ip_vs_iphdr.

I actually have a patch that passes state-net down to where it is
needed or uses ip_vs_conn_net() and it it looks ok, but the patch
is so busy with small tweaks that I need to break it up.

Further I am staring at things and trying to see if there is a way to
make the mass of parameters more readable/obvious.   Something like
the passing of struct nf_hook_state into the netfilter hooks.

Or in other words I want to make the change to ip_vs deadly boring,
obviously correct, and trivial to maintain, and I am not quite there
yet.

Eric

p.s.  I do have my patch that I can toss in your direction if you are
interested.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH iproute2] tests: Add output testing

2015-06-17 Thread Vadim Kochan
From: Vadim Kochan vadi...@gmail.com

Added possibility to check command output by grep from the testing
script.

Now TMP_OUT  TMP_ERR are passed from Makefile and changed to
STD_ERR  STD_OUT.

Also changed some existing tests to make output testing.

Signed-off-by: Vadim Kochan vadi...@gmail.com
---
 testsuite/Makefile   |  4 ++
 testsuite/lib/generic.sh | 80 
 testsuite/tests/ip/link/new_link.t   |  4 ++
 testsuite/tests/ip/route/add_default_route.t | 23 +++-
 4 files changed, 88 insertions(+), 23 deletions(-)

diff --git a/testsuite/Makefile b/testsuite/Makefile
index 4b945b0..2027650 100644
--- a/testsuite/Makefile
+++ b/testsuite/Makefile
@@ -52,6 +52,9 @@ endif
@for i in $(IPVERS); do \
o=`echo $$i | sed -e 's/iproute2\///'`; \
echo -n Running $@ [$$o/`uname -r`]: ; \
+   TMP_ERR=`mktemp /tmp/tc_testsuite.XX`; \
+   TMP_OUT=`mktemp /tmp/tc_testsuite.XX`; \
+   STD_ERR=$$TMP_ERR STD_OUT=$$TMP_OUT \
TC=$$i/tc/tc IP=$$i/ip/ip DEV=$(DEV) IPVER=$@ 
SNAME=$$i \
ERRF=$(RESULTS_DIR)/$@.$$o.err $(KENV) $(PREFIX) tests/$@  
$(RESULTS_DIR)/$@.$$o.out; \
if [ $$? = 127 ]; then \
@@ -61,5 +64,6 @@ endif
else \
echo PASS; \
fi; \
+   rm $$TMP_ERR $$TMP_OUT; \
dmesg  $(RESULTS_DIR)/$@.$$o.dmesg; \
done
diff --git a/testsuite/lib/generic.sh b/testsuite/lib/generic.sh
index 3473cc1..b7de704 100644
--- a/testsuite/lib/generic.sh
+++ b/testsuite/lib/generic.sh
@@ -30,57 +30,49 @@ ts_tc()
 {
SCRIPT=$1; shift
DESC=$1; shift
-   TMP_ERR=`mktemp /tmp/tc_testsuite.XX` || exit
-   TMP_OUT=`mktemp /tmp/tc_testsuite.XX` || exit
 
-   $TC $@ 2 $TMP_ERR  $TMP_OUT
+   $TC $@ 2 $STD_ERR  $STD_OUT
 
-   if [ -s $TMP_ERR ]; then
+   if [ -s $STD_ERR ]; then
ts_err ${SCRIPT}: ${DESC} failed:
ts_err command: $TC $@
ts_err stderr output:
-   ts_err_cat $TMP_ERR
-   if [ -s $TMP_OUT ]; then
+   ts_err_cat $STD_ERR
+   if [ -s $STD_OUT ]; then
ts_err stdout output:
-   ts_err_cat $TMP_OUT
+   ts_err_cat $STD_OUT
fi
-   elif [ -s $TMP_OUT ]; then
+   elif [ -s $STD_OUT ]; then
echo ${SCRIPT}: ${DESC} succeeded with output:
-   cat $TMP_OUT
+   cat $STD_OUT
else
echo ${SCRIPT}: ${DESC} succeeded
fi
-
-   rm $TMP_ERR $TMP_OUT
 }
 
 ts_ip()
 {
SCRIPT=$1; shift
DESC=$1; shift
-   TMP_ERR=`mktemp /tmp/tc_testsuite.XX` || exit
-   TMP_OUT=`mktemp /tmp/tc_testsuite.XX` || exit
 
-   $IP $@ 2 $TMP_ERR  $TMP_OUT
+   $IP $@ 2 $STD_ERR  $STD_OUT
 RET=$?
 
-   if [ -s $TMP_ERR ] || [ $RET != 0 ]; then
+   if [ -s $STD_ERR ] || [ $RET != 0 ]; then
ts_err ${SCRIPT}: ${DESC} failed:
ts_err command: $IP $@
ts_err stderr output:
-   ts_err_cat $TMP_ERR
-   if [ -s $TMP_OUT ]; then
+   ts_err_cat $STD_ERR
+   if [ -s $STD_OUT ]; then
ts_err stdout output:
-   ts_err_cat $TMP_OUT
+   ts_err_cat $STD_OUT
fi
-   elif [ -s $TMP_OUT ]; then
+   elif [ -s $STD_OUT ]; then
echo ${SCRIPT}: ${DESC} succeeded with output:
-   cat $TMP_OUT
+   cat $STD_OUT
else
echo ${SCRIPT}: ${DESC} succeeded
fi
-
-   rm $TMP_ERR $TMP_OUT
 }
 
 ts_qdisc_available()
@@ -97,3 +89,47 @@ rand_dev()
 {
 echo dev-$(tr -dc [:alpha:]  /dev/urandom | head -c 6)
 }
+
+pr_failed()
+{
+   echo  [FAILED]
+   ts_err matching failed
+}
+
+pr_success()
+{
+   echo  [SUCCESS]
+}
+
+test_on()
+{
+   echo -n test on: \$1\
+   if cat $STD_OUT | grep -qE $1
+   then
+   pr_success
+   else
+   pr_failed
+   fi
+}
+
+test_on_not()
+{
+   echo -n test on: \$1\
+   if cat $STD_OUT | grep -vqE $1
+   then
+   pr_success
+   else
+   pr_failed
+   fi
+}
+
+test_lines_count()
+{
+   echo -n test on lines count ($1): 
+   if cat $STD_OUT | wc -l | grep -q $1
+   then
+   pr_success
+   else
+   pr_failed
+   fi
+}
diff --git a/testsuite/tests/ip/link/new_link.t 
b/testsuite/tests/ip/link/new_link.t
index 549ff25..699adbc 100755
--- a/testsuite/tests/ip/link/new_link.t
+++ b/testsuite/tests/ip/link/new_link.t
@@ -7,5 +7,9 @@ ts_log [Testing add/del virtual links]
 NEW_DEV=$(rand_dev)
 
 ts_ip $0 Add $NEW_DEV dummy interface  link add dev $NEW_DEV 

[PATCH 1/4] net: stmmac: dwmac-rk: remove unused gpio register defines

2015-06-17 Thread Heiko Stuebner
In a first version the driver did want to do some gpio wiggling, which
of course never made it into the kernel, but somehow these register
defines where forgotten. Remove them, as they shouldn't be here.

Signed-off-by: Heiko Stuebner he...@sntech.de
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c 
b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 30e28f0..49c7715 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -65,9 +65,6 @@ struct rk_priv_data {
 
 #define RK3288_GRF_SOC_CON10x0248
 #define RK3288_GRF_SOC_CON30x0250
-#define RK3288_GRF_GPIO3D_E0x01ec
-#define RK3288_GRF_GPIO4A_E0x01f0
-#define RK3288_GRF_GPIO4B_E0x01f4
 
 /*RK3288_GRF_SOC_CON1*/
 #define GMAC_PHY_INTF_SEL_RGMII(GRF_BIT(6) | GRF_CLR_BIT(7) | 
GRF_CLR_BIT(8))
-- 
2.1.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/4] net: stmmac: dwmac-rk: add support for rk3368

2015-06-17 Thread Heiko Stuebner
Apart from small cleanups, this series provides support for the dwmac
on the new rk3368 ARM64 soc.

Tested on a R88 board using a RMII phy.

Heiko Stuebner (4):
  net: stmmac: dwmac-rk: remove unused gpio register defines
  net: stmmac: dwmac-rk: Fix clk rate when provided by soc
  net: stmmac: dwmac-rk: abstract access to mac settings in the GRF
  net: stmmac: dwmac-rk: add rk3368-specific data

 .../devicetree/bindings/net/rockchip-dwmac.txt |   2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 248 +
 2 files changed, 203 insertions(+), 47 deletions(-)

-- 
2.1.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net] ipv4: include NLM_F_APPEND flag in append route notifications

2015-06-17 Thread roopa

On 6/17/15, 10:31 AM, Alexander Duyck wrote:


I'd say go with something closer to the original patch, but flip the 
logic like you have here, and lose the |= in favor of an = since 
you are either sending a message with 0 or NLM_F_APPEND.


Anyway that is just my $.02.

agreed,

thanks alex, v2 posted..
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net-next v2] ipv4: include NLM_F_APPEND flag in append route notifications

2015-06-17 Thread Roopa Prabhu
From: Roopa Prabhu ro...@cumulusnetworks.com

This patch adds NLM_F_APPEND flag to struct nlmsg_hdr-nlmsg_flags
in newroute notifications if the route add was an append.
(This is similar to how NLM_F_REPLACE is already part of new
route replace notifications today)

This helps userspace determine if the route add operation was
an append.

Signed-off-by: Roopa Prabhu ro...@cumulusnetworks.com
---
v2: flip if condition around append and change |= to =
(feedback from Alexander Duyck and Scott Feldman)

 net/ipv4/fib_trie.c |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3c699c4..6c666a9 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1082,6 +1082,7 @@ int fib_table_insert(struct fib_table *tb, struct 
fib_config *cfg)
struct trie *t = (struct trie *)tb-tb_data;
struct fib_alias *fa, *new_fa;
struct key_vector *l, *tp;
+   unsigned int nlflags = 0;
struct fib_info *fi;
u8 plen = cfg-fc_dst_len;
u8 slen = KEYLENGTH - plen;
@@ -1201,7 +1202,9 @@ int fib_table_insert(struct fib_table *tb, struct 
fib_config *cfg)
if (fa_match)
goto out;
 
-   if (!(cfg-fc_nlflags  NLM_F_APPEND))
+   if (cfg-fc_nlflags  NLM_F_APPEND)
+   nlflags = NLM_F_APPEND;
+   else
fa = fa_first;
}
err = -ENOENT;
@@ -1238,7 +1241,7 @@ int fib_table_insert(struct fib_table *tb, struct 
fib_config *cfg)
 
rt_cache_flush(cfg-fc_nlinfo.nl_net);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa-tb_id,
- cfg-fc_nlinfo, 0);
+ cfg-fc_nlinfo, nlflags);
 succeeded:
return 0;
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: macb napi strange behavior

2015-06-17 Thread Nicolae Rosia
Hi,

On Wed, Jun 17, 2015 at 9:54 PM, Jaeden Amero jaeden.am...@ni.com wrote:
 On 06/17/2015 11:09 AM, Nicolae Rosia wrote:
 The times we've seen tons of interrupts on Ethernet with interrupts
 routed through the PL was when the FPGA was unprogrammed (or in the
 process of being reprogrammed), or was configured with the interrupt
 line tied to asserted.

 In the latter case, Linux would eventually stop handling any more
 interrupts for that port due to the interrupt storm.

This isn't the case. The FPGA is programmed, and indeed I'm using the
second MAC routed through PL to SFP.
The interesting thing is that I'm seeing the exact behavior on the
other side (another Zynq7 board), with eth0 having lots of interrupts.
It seems that the interface receiving packets doesn't have a high IRQ
activity in contrast to the one sending packets.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next v2] ipv4: include NLM_F_APPEND flag in append route notifications

2015-06-17 Thread roopa

On 6/17/15, 11:30 AM, Scott Feldman wrote:

On Wed, Jun 17, 2015 at 11:07 AM, Roopa Prabhu
ro...@cumulusnetworks.com wrote:

From: Roopa Prabhu ro...@cumulusnetworks.com

This patch adds NLM_F_APPEND flag to struct nlmsg_hdr-nlmsg_flags
in newroute notifications if the route add was an append.
(This is similar to how NLM_F_REPLACE is already part of new
route replace notifications today)

This helps userspace determine if the route add operation was
an append.

Signed-off-by: Roopa Prabhu ro...@cumulusnetworks.com
---
v2: flip if condition around append and change |= to =
(feedback from Alexander Duyck and Scott Feldman)

Is this a bug fix for net, or a new feature for net-next?  Regardless,

Acked-by: Scott Feldman sfel...@gmail.com

I could not decide between net and net-next.
but the patch applies cleanly against net if Dave decides to pick it 
up for net.

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next 43/43] netfilter: Skip unnecessary calls to synchronize_net

2015-06-17 Thread Eric W. Biederman
Patrick McHardy ka...@trash.net writes:

 On 17.06, Eric W. Biederman wrote:
 From: Eric W Biederman ebied...@xmission.com
 
 Signed-off-by: Eric W. Biederman ebied...@xmission.com
 ---
  net/netfilter/core.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)
 
 diff --git a/net/netfilter/core.c b/net/netfilter/core.c
 index 95456c09cf69..1b4eadc9c030 100644
 --- a/net/netfilter/core.c
 +++ b/net/netfilter/core.c
 @@ -134,7 +134,9 @@ void nf_unregister_hook(struct net *net, const struct 
 nf_hook_ops *reg)
  #ifdef HAVE_JUMP_LABEL
  static_key_slow_dec(nf_hooks_needed[reg-pf][reg-hooknum]);
  #endif
 -synchronize_net();
 +/* Don't wait if there are no packets in flight */
 +if (net-loopback_dev)
 +synchronize_net();

 I don't get this, could you please explain why there wouldn't be any packets
 in flight if there is no loopback_dev?

The simplified version.

Typical netfilter module:
 static struct nf_hook_ops mod_ops[] __read_mostly = {
   ...
 };
 static int __net_init mod_init(struct net *net)
 {
 return nf_register_hooks(net, mod_ops, ARRAY_SIZE(mod_ops));
 }
 static void __net_exit mod_net_exit(struct net *net)
 {
   nf_unregister_hooks(net, mod_ops, ARRAY_SIZE(mod_ops));
 }
 static struct pernet_operations mod_net_ops = {
   .init =  mod_net_init,
   .exit =  mod_net_exit,
 };
 static int __init mod_init(void)
 {
   return register_pernet_subsys(mod_net_ops);
 }
 static void __exit mod_fini(void)
 {
   unregister_pernet_subsys(mod_net_ops);
 }

Which means there are essentially two times when nf_unregister_hook is
called:
- At some random time when the netfilter module is being removed.
- From unregister_pernet_subsys.

It is an invariant of subsys code called from the network namespace
exit path that no packets are in flight.  Without that invariant
it is a nightmare to clean up data structures, etc.

The last thing that happens before the network namespace subsystem
exit routines are called is the loopback device is freed.

That happens in default_device_exit_batch in the rtnl_unlock() call.


Another way to look at it is that:  I know there are no user space
sockets, and I know that there are no networking devices by the time
the loopback device is freed as part of network namespace clean up.
Which removes all sources of packets.

Additionally we have to perform all of the rcu waits before we can free
any network device and with the loopback device being the last network
device freed those waits have all been performed.


The network stack also goes kaboom in the most interesting ways when we
goof up and violate the rule that guarantee that packets are not in
flight when unregistering.  I have not seen that in years.


And my older documentation in net_namespace.h says:
 /*
  * Use these carefully.  If you implement a network device and it
  * needs per network namespace operations use device pernet operations,
  * otherwise use pernet subsys operations.
  *
  * Network interfaces need to be removed from a dying netns _before_
  * subsys notifiers can be called, as most of the network code cleanup
  * (which is done from subsys notifiers) runs with the assumption that
  * dev_remove_pack has been called so no new packets will arrive during
  * and after the cleanup functions have been called.  dev_remove_pack
  * is not per namespace so instead the guarantee of no more packets
  * arriving in a network namespace is provided by ensuring that all
  * network devices and all sockets have left the network namespace
  * before the cleanup methods are called.
  *
  * For the longest time the ipv4 icmp code was registered as a pernet
  * device which caused kernel oops, and panics during network
  * namespace cleanup.   So please don't get this wrong.
  */
 int register_pernet_subsys(struct pernet_operations *);
 void unregister_pernet_subsys(struct pernet_operations *);
 int register_pernet_device(struct pernet_operations *);
 void unregister_pernet_device(struct pernet_operations *);


Another more explicit way of looking at this in the context of
netfilter:
- The loopback device is free (which is the mast network device to be
  freed) so there are no more network devices.
- Therefore the netfilter tracepoints can no longer be called.
- Therefore the worst we are dealing with is someone accessing
  a network device via an rcu.
- The rcu_barrier() after NETDEV_UNREGISTER in netdev_run_todo()
  is there so that a network device can assume that it has no rcu
  references from packets or other things.
- We know the loopback device and all other network devices before it 
  have passed that rcu_barrier in netdev_run_todo().

- Or in short: loopback_dev == NULL means we have no network devices and
  an rcu grace perioed has elapsed since the last network device was
  freed.

Therefore loopback_dev == NULL is a strong and reliable indication that
we don't have any packets in flight in a network namespace.

Hmm.  I wonder if we might want to 

Re: [PATCH net-next 00/15] Simplify netfilter and network namespaces

2015-06-17 Thread Julian Anastasov

Hello,

On Wed, 17 Jun 2015, Eric W. Biederman wrote:

 p.s.  I do have my patch that I can toss in your direction if you are
 interested.

Of course... I'll be able to check it after 8 hours...

Regards

--
Julian Anastasov j...@ssi.bg
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next v2 4/5] rocker: add offload_fwd_mark support

2015-06-17 Thread sfeldma
From: Scott Feldman sfel...@gmail.com

If device flags ingress packet as fwd offload, mark the
skb-offlaod_fwd_mark using the ingress port's dev-offlaod_fwd_mark.  This
will be the hint to the kernel that this packet has already been forwarded
by device to egress ports matching skb-offlaod_fwd_mark.

For rocker, derive port dev-offlaod_fwd_mark based on device switch ID and
port ifindex.  If port is bridged, use the bridge ifindex rather than the
port ifindex.

Signed-off-by: Scott Feldman sfel...@gmail.com
---
 drivers/net/ethernet/rocker/rocker.c |   14 +-
 drivers/net/ethernet/rocker/rocker.h |1 +
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
index d4ec660..b72674c 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -4701,6 +4701,7 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
const struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1];
struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info);
size_t rx_len;
+   u16 rx_flags = 0;
 
if (!skb)
return -ENOENT;
@@ -4708,6 +4709,8 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info);
if (!attrs[ROCKER_TLV_RX_FRAG_LEN])
return -EINVAL;
+   if (attrs[ROCKER_TLV_RX_FLAGS])
+   rx_flags = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FLAGS]);
 
rocker_dma_rx_ring_skb_unmap(rocker, attrs);
 
@@ -4715,6 +4718,9 @@ static int rocker_port_rx_proc(const struct rocker 
*rocker,
skb_put(skb, rx_len);
skb-protocol = eth_type_trans(skb, rocker_port-dev);
 
+   if (rx_flags  ROCKER_RX_FLAGS_FWD_OFFLOAD)
+   skb-offload_fwd_mark = rocker_port-dev-offload_fwd_mark;
+
rocker_port-dev-stats.rx_packets++;
rocker_port-dev-stats.rx_bytes += skb-len;
 
@@ -4852,11 +4858,13 @@ static int rocker_probe_port(struct rocker *rocker, 
unsigned int port_number)
}
rocker-ports[port_number] = rocker_port;
 
+   switchdev_port_fwd_mark_set(rocker_port-dev, NULL, false);
+
rocker_port_set_learning(rocker_port, SWITCHDEV_TRANS_NONE);
 
err = rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, 0);
if (err) {
-   dev_err(pdev-dev, install ig port table failed\n);
+   netdev_err(rocker_port-dev, install ig port table failed\n);
goto err_port_ig_tbl;
}
 
@@ -4876,6 +4884,7 @@ err_untagged_vlan:
rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE,
   ROCKER_OP_FLAG_REMOVE);
 err_port_ig_tbl:
+   rocker-ports[port_number] = NULL;
unregister_netdev(dev);
 err_register_netdev:
free_netdev(dev);
@@ -5131,6 +5140,7 @@ static int rocker_port_bridge_join(struct rocker_port 
*rocker_port,
rocker_port_internal_vlan_id_get(rocker_port, bridge-ifindex);
 
rocker_port-bridge_dev = bridge;
+   switchdev_port_fwd_mark_set(rocker_port-dev, bridge, true);
 
return rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
untagged_vid, 0);
@@ -5151,6 +5161,8 @@ static int rocker_port_bridge_leave(struct rocker_port 
*rocker_port)
rocker_port_internal_vlan_id_get(rocker_port,
 rocker_port-dev-ifindex);
 
+   switchdev_port_fwd_mark_set(rocker_port-dev, rocker_port-bridge_dev,
+   false);
rocker_port-bridge_dev = NULL;
 
err = rocker_port_vlan_add(rocker_port, SWITCHDEV_TRANS_NONE,
diff --git a/drivers/net/ethernet/rocker/rocker.h 
b/drivers/net/ethernet/rocker/rocker.h
index c61fbf9..f846c0d 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -245,6 +245,7 @@ enum {
 #define ROCKER_RX_FLAGS_TCPBIT(5)
 #define ROCKER_RX_FLAGS_UDPBIT(6)
 #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD  BIT(7)
+#define ROCKER_RX_FLAGS_FWD_OFFLOADBIT(8)
 
 enum {
ROCKER_TLV_TX_UNSPEC,
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next v2 1/5] net: don't reforward packets already forwarded by offload device

2015-06-17 Thread sfeldma
From: Scott Feldman sfel...@gmail.com

Just before queuing skb for xmit on port, check if skb has been marked by
switchdev port driver as already fordwarded by device.  If so, drop skb.  A
non-zero skb-offload_fwd_mark field is set by the switchdev port
driver/device on ingress to indicate the skb has already been forwarded by
the device to egress ports with matching dev-skb_mark.  The switchdev port
driver would assign a non-zero dev-skb_mark for each device port netdev
during registration, for example.

Signed-off-by: Scott Feldman sfel...@gmail.com
---
 include/linux/netdevice.h |6 ++
 include/linux/skbuff.h|4 
 net/core/dev.c|   10 ++
 3 files changed, 20 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e20979d..7be616e1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1448,6 +1448,8 @@ enum netdev_priv_flags {
  *
  * @xps_maps:  XXX: need comments on this one
  *
+ * @offload_fwd_mark:  Offload device fwding mark
+ *
  * @trans_start:   Time (in jiffies) of last Tx
  * @watchdog_timeo:Represents the timeout that is used by
  * the watchdog ( see dev_watchdog() )
@@ -1685,6 +1687,10 @@ struct net_device {
struct xps_dev_maps __rcu *xps_maps;
 #endif
 
+#ifdef CONFIG_NET_SWITCHDEV
+   u32 offload_fwd_mark;
+#endif
+
/* These may be needed for future network-power-down code. */
 
/*
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d6cdd6e..1533c4f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -506,6 +506,7 @@ static inline u32 skb_mstamp_us_delta(const struct 
skb_mstamp *t1,
  * @no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
   *@napi_id: id of the NAPI struct this skb came from
  * @secmark: security marking
+ * @offload_fwd_mark: fwding offload mark
  * @mark: Generic packet mark
  * @vlan_proto: vlan encapsulation protocol
  * @vlan_tci: vlan tag control information
@@ -653,6 +654,9 @@ struct sk_buff {
 #ifdef CONFIG_NETWORK_SECMARK
__u32   secmark;
 #endif
+#ifdef CONFIG_NET_SWITCHDEV
+   __u32   offload_fwd_mark;
+#endif
union {
__u32   mark;
__u32   reserved_tailroom;
diff --git a/net/core/dev.c b/net/core/dev.c
index 6778a99..9eb517e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3065,6 +3065,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void 
*accel_priv)
else
skb_dst_force(skb);
 
+#ifdef CONFIG_NET_SWITCHDEV
+   /* Don't forward if offload device already forwarded */
+   if (skb-offload_fwd_mark 
+   skb-offload_fwd_mark == dev-offload_fwd_mark) {
+   consume_skb(skb);
+   rc = NET_XMIT_SUCCESS;
+   goto out;
+   }
+#endif
+
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq-qdisc);
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next v2 3/5] switchdev: add offload_fwd_mark generator helper

2015-06-17 Thread sfeldma
From: Scott Feldman sfel...@gmail.com

skb-offload_fwd_mark and dev-offload_fwd_mark are 32-bit and should be
unique for device and may even be unique for a sub-set of ports within
device, so add switchdev helper function to generate unique marks based on
port's switch ID and group_ifindex.  group_ifindex would typically be the
container dev's ifindex, such as the bridge's ifindex.

The generator uses a global hash table to store offload_fwd_marks hashed by
{switch ID, group_ifindex} key.

Signed-off-by: Scott Feldman sfel...@gmail.com
---
 include/net/switchdev.h   |9 
 net/switchdev/switchdev.c |  103 +
 2 files changed, 112 insertions(+)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 437f8fe..d882902 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -157,6 +157,9 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr 
*tb[],
 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev,
struct net_device *filter_dev, int idx);
+void switchdev_port_fwd_mark_set(struct net_device *dev,
+struct net_device *group_dev,
+bool joining);
 
 #else
 
@@ -271,6 +274,12 @@ static inline int switchdev_port_fdb_dump(struct sk_buff 
*skb,
return -EOPNOTSUPP;
 }
 
+static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
+  struct net_device *group_dev,
+  bool joining)
+{
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 00c67a5..6cb30bf 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -920,3 +920,106 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
fi-fib_net-ipv4.fib_offload_disabled = true;
 }
 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
+
+static bool switchdev_port_same_parent_id(struct net_device *a,
+ struct net_device *b)
+{
+   struct switchdev_attr a_attr = {
+   .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+   .flags = SWITCHDEV_F_NO_RECURSE,
+   };
+   struct switchdev_attr b_attr = {
+   .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+   .flags = SWITCHDEV_F_NO_RECURSE,
+   };
+
+   if (switchdev_port_attr_get(a, a_attr) ||
+   switchdev_port_attr_get(b, b_attr))
+   return false;
+
+   return netdev_phys_item_id_same(a_attr.u.ppid, b_attr.u.ppid);
+}
+
+static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
+  struct net_device *group_dev)
+{
+   struct net_device *lower_dev;
+   struct list_head *iter;
+
+   netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+   if (lower_dev == dev)
+   continue;
+   if (switchdev_port_same_parent_id(dev, lower_dev))
+   return lower_dev-offload_fwd_mark;
+   return switchdev_port_fwd_mark_get(dev, lower_dev);
+   }
+
+   return dev-ifindex;
+}
+
+static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
+ u32 old_mark, u32 *reset_mark)
+{
+   struct net_device *lower_dev;
+   struct list_head *iter;
+
+   netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+   if (lower_dev-offload_fwd_mark == old_mark) {
+   if (!*reset_mark)
+   *reset_mark = lower_dev-ifindex;
+   lower_dev-offload_fwd_mark = *reset_mark;
+   }
+   switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
+   }
+}
+
+/**
+ * switchdev_port_fwd_mark_set - Set port offload forwarding mark
+ *
+ * @dev: port device
+ * @group_dev: containing device
+ * @joining: true if dev is joining group; false if leaving group
+ *
+ * An ungrouped port's offload mark is just its ifindex.  A grouped
+ * port's (member of a bridge, for example) offload mark is the ifindex
+ * of one of the ports in the group with the same parent (switch) ID.
+ * Ports on the same device in the same group will have the same mark.
+ *
+ * Example:
+ *
+ * br0 ifindex=9
+ *   sw1p1 ifindex=2   mark=2
+ *   sw1p2 ifindex=3   mark=2
+ *   sw2p1 ifindex=4   mark=5
+ *   sw2p2 ifindex=5   mark=5
+ *
+ * If sw2p2 leaves the bridge, we'll have:
+ *
+ * br0 ifindex=9
+ *   sw1p1 ifindex=2   mark=2
+ *   sw1p2 ifindex=3   mark=2
+ *   sw2p1 ifindex=4   mark=4
+ * sw2p2   

[RFC PATCH net-next v2 2/5] net: add phys ID compare helper to test if two IDs are the same

2015-06-17 Thread sfeldma
From: Scott Feldman sfel...@gmail.com

Signed-off-by: Scott Feldman sfel...@gmail.com
---
 include/linux/netdevice.h |7 +++
 net/switchdev/switchdev.c |8 ++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7be616e1..63090ce 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -766,6 +766,13 @@ struct netdev_phys_item_id {
unsigned char id_len;
 };
 
+static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
+   struct netdev_phys_item_id *b)
+{
+   return ((a-id_len == b-id_len) 
+   (memcmp(a-id, b-id, a-id_len) == 0));
+}
+
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
   struct sk_buff *skb);
 
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index a5d0f8e..00c67a5 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -791,13 +791,9 @@ static struct net_device *switchdev_get_dev_by_nhs(struct 
fib_info *fi)
if (switchdev_port_attr_get(dev, attr))
return NULL;
 
-   if (nhsel  0) {
-   if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len)
+   if (nhsel  0 
+   !netdev_phys_item_id_same(prev_attr.u.ppid, attr.u.ppid))
return NULL;
-   if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id,
-  attr.u.ppid.id_len))
-   return NULL;
-   }
 
prev_attr = attr;
}
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next v2 5/5] switchdev: update documentation for offload_fwd_mark

2015-06-17 Thread sfeldma
From: Scott Feldman sfel...@gmail.com

Signed-off-by: Scott Feldman sfel...@gmail.com
---
 Documentation/networking/switchdev.txt |   14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/switchdev.txt 
b/Documentation/networking/switchdev.txt
index c5d7ade..b864e47 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -279,8 +279,18 @@ and unknown unicast packets to all ports in domain, if 
allowed by port's
 current STP state.  The switch driver, knowing which ports are within which
 vlan L2 domain, can program the switch device for flooding.  The packet should
 also be sent to the port netdev for processing by the bridge driver.  The
-bridge should not reflood the packet to the same ports the device flooded.
-XXX: the mechanism to avoid duplicate flood packets is being discuseed.
+bridge should not reflood the packet to the same ports the device flooded,
+otherwise there will be duplicate packets on the wire.
+
+To avoid duplicate packets, the device/driver can mark a packet as already
+forwarded using skb-offload_fwd_mark.  The same mark is set on the device
+ports in the domain using dev-offload_fwd_mark.  If the skb-offload_fwd_mark
+is non-zero and matches the forwarding egress port's dev-skb_mark, the kernel
+will drop the skb right before transmit on the egress port, with the
+understanding that the device already forwarded the packet on same egress port.
+The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark
+for port's dev-offload_fwd_mark, based on the port's parent ID (switch ID) and
+a group ifindex.
 
 It is possible for the switch device to not handle flooding and push the
 packets up to the bridge driver for flooding.  This is not ideal as the number
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH net-next v2 0/5] switchdev: avoid duplicate packet forwarding

2015-06-17 Thread sfeldma
From: Scott Feldman sfel...@gmail.com

(RFC because we're at rc7+ now)

v2:

 - s/fwd_mark/offload_fwd_mark
 - use consume_skb rather than kfree_skb when dropping pkt on egress.
 - Use Jiri's suggestion to use ifindex of one of the ports in a group
   as the mark for all the ports in the group.  This can be done with
   no additional storage (no hashtable from v1).  To pull it off, we
   need some simple recursive routines to walk the netdev tree ensuring
   all leaves in the tree (ports) in the same group (e.g. bridge)
   belonging to the same switch device will have the same offload fwd mark.
   Maybe someone sees a better design for the recusive routines?  They're
   not too bad, and should cover the stacked driver cases.

v1:

With switchdev support for offloading L2/L3 forwarding data path to a
switch device, we have a general problem where both the device and the
kernel may forward the packet, resulting in duplicate packets on the wire.
Anytime a packet is forwarded by the device and a copy is sent to the CPU,
there is potential for duplicate forwarding, as the kernel may also do a
forwarding lookup and send the packet on the wire.

The specific problem this patch series is interested in solving is avoiding
duplicate packets on bridged ports.  There was a previous RFC from Roopa
(http://marc.info/?l=linux-netdevm=142687073314252w=2) to address this
problem, but didn't solve the problem of mixed ports in the bridge from
different devices; there was no way to exclude some ports from forwarding
and include others.  This RFC solves that problem by tagging the ingressing
packet with a unique mark, and then comparing the packet mark with the
egress port mark, and skip forwarding when there is a match.  For the mixed
ports bridge case, only those ports with matching marks are skipped.

The switchdev port driver must do two things:

1) Generate a fwd_mark for each switch port, using some unique key of the
   switch device (and optionally port).  This is done when the port netdev
   is registered or if the port's group membership changes (joins/leaves
   a bridge, for example).

2) On packet ingress from port, mark the skb with the ingress port's
   fwd_mark.  If the device supports it, it's useful to only mark skbs
   which were already forwarded by the device.  If the device does not
   support such indication, all skbs can be marked, even if they're
   local dst.

Two new 32-bit fields are added to struct sk_buff and struct netdevice to
hold the fwd_mark.  I've wrapped these with CONFIG_NET_SWITCHDEV for now. I
tried using skb-mark for this purpose, but ebtables can overwrite the
skb-mark before the bridge gets it, so that will not work.

In general, this fwd_mark can be used for any case where a packet is
forwarded by the device and a copy is sent to the CPU, to avoid the kernel
re-forwarding the packet.  sFlow is another use-case that comes to mind,
but I haven't explored the details.



Scott Feldman (5):
  net: don't reforward packets already forwarded by offload device
  net: add phys ID compare helper to test if two IDs are the same
  switchdev: add offload_fwd_mark generator helper
  rocker: add offload_fwd_mark support
  switchdev: update documentation for offload_fwd_mark

 Documentation/networking/switchdev.txt |   14 +++-
 drivers/net/ethernet/rocker/rocker.c   |   14 +++-
 drivers/net/ethernet/rocker/rocker.h   |1 +
 include/linux/netdevice.h  |   13 
 include/linux/skbuff.h |4 ++
 include/net/switchdev.h|9 +++
 net/core/dev.c |   10 +++
 net/switchdev/switchdev.c  |  111 ++--
 8 files changed, 167 insertions(+), 9 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/3] net: mvneta: introduce compatible string marvell, armada-xp-neta

2015-06-17 Thread Jason Cooper
Hey Thomas,

On Wed, Jun 17, 2015 at 10:43:12PM +0200, Thomas Petazzoni wrote:
 On Wed, 17 Jun 2015 17:01:12 +, Jason Cooper wrote:
 
  I disagree with this.  We can't predict what incosistencies we'll discover 
  in
  the future.  We should only assign new compatible strings based on known IP
  variations when we discover them.  This seems fraught with demons since we
  can't predict the scope of affected IP blocks (some steppings of one SoC, 
  three
  SoCs plus two steppings of a fourth, etc)
  
  imho, the 'future-proofing' lies in being specific as to the naming of the
  compatible strings against known hardware variations at the time.
 
 Except that this clearly doesn't work, and the case raised by Simon is
 a perfect illustration of why planning ahead is beneficial. 

Odd, I'd use that as an example of the process working.  ;-)  we have
everyone using 'armada-370-neta' for a given block.  We discovered that
the original IP block (on the 370s) had a limitation (no hw checksum
for greater than 1600 bytes).  A newer version of the IP block (XP)
doesn't have the limitation.

So we change the driver to honor the limit for the 370 compatible
string.  We create a new compatible string for xp where the block
doesn't have the limitation.

How did the process fail?

 We already had the issue several times on mvebu platforms, so it
 should really become the rule to have one compatible string specific
 to the actual SoC in the list of compatible strings.

Sorry, I'm just not a fan of guessing.  But I'll fall back to the DT
maintainers on this one.  if they are ok with it, then I'll drop my
objection.

 Not doing so requires breaking DT backward compatibility more often, so
 wanting DT backward compatibility and not wanting to plan ahead is a
 bit antagonist.

I'm not seeing where backwards compatibility was broken?  A device with
an old dtb booting a newer kernel gets a bugfix.  In the case of an XP
board with an old dtb (armada-370-neta), the hardware still works, but
not optimally.  Upgrading the dtb will enable hw checksumming for jumbo
packets.

thx,

Jason.
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] net: stmmac: dwmac-rk: add rk3368-specific data

2015-06-17 Thread Heiko Stuebner
Add constants and callback functions for the dwmac on rk3368 socs.
As can be seen, the base structure is the same, only registers and
the bits in them moved slightly.

Signed-off-by: Heiko Stuebner he...@sntech.de
---
 .../devicetree/bindings/net/rockchip-dwmac.txt |   2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 126 +
 2 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt 
b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
index 21fd199..93eac7c 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
@@ -3,7 +3,7 @@ Rockchip SoC RK3288 10/100/1000 Ethernet driver(GMAC)
 The device node has following properties.
 
 Required properties:
- - compatible: Can be rockchip,rk3288-gmac.
+ - compatible: Can be one of rockchip,rk3288-gmac, rockchip,rk3368-gmac
  - reg: addresses and length of the register sets for the device.
  - interrupts: Should contain the GMAC interrupts.
  - interrupt-names: Should contain the interrupt names macirq.
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c 
b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 7ae17c6..fe08ee7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -184,6 +184,118 @@ struct rk_gmac_ops rk3288_ops = {
.set_rmii_speed = rk3288_set_rmii_speed,
 };
 
+#define RK3368_GRF_SOC_CON15   0x043c
+#define RK3368_GRF_SOC_CON16   0x0440
+
+/* RK3368_GRF_SOC_CON15 */
+#define RK3368_GMAC_PHY_INTF_SEL_RGMII (GRF_BIT(9) | GRF_CLR_BIT(10) | \
+GRF_CLR_BIT(11))
+#define RK3368_GMAC_PHY_INTF_SEL_RMII  (GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \
+GRF_BIT(11))
+#define RK3368_GMAC_FLOW_CTRL  GRF_BIT(8)
+#define RK3368_GMAC_FLOW_CTRL_CLR  GRF_CLR_BIT(8)
+#define RK3368_GMAC_SPEED_10M  GRF_CLR_BIT(7)
+#define RK3368_GMAC_SPEED_100M GRF_BIT(7)
+#define RK3368_GMAC_RMII_CLK_25M   GRF_BIT(3)
+#define RK3368_GMAC_RMII_CLK_2_5M  GRF_CLR_BIT(3)
+#define RK3368_GMAC_CLK_125M   (GRF_CLR_BIT(4) | GRF_CLR_BIT(5))
+#define RK3368_GMAC_CLK_25M(GRF_BIT(4) | GRF_BIT(5))
+#define RK3368_GMAC_CLK_2_5M   (GRF_CLR_BIT(4) | GRF_BIT(5))
+#define RK3368_GMAC_RMII_MODE  GRF_BIT(6)
+#define RK3368_GMAC_RMII_MODE_CLR  GRF_CLR_BIT(6)
+
+/* RK3368_GRF_SOC_CON16 */
+#define RK3368_GMAC_TXCLK_DLY_ENABLE   GRF_BIT(7)
+#define RK3368_GMAC_TXCLK_DLY_DISABLE  GRF_CLR_BIT(7)
+#define RK3368_GMAC_RXCLK_DLY_ENABLE   GRF_BIT(15)
+#define RK3368_GMAC_RXCLK_DLY_DISABLE  GRF_CLR_BIT(15)
+#define RK3368_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 8)
+#define RK3368_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0)
+
+static void rk3368_set_to_rgmii(struct rk_priv_data *bsp_priv,
+   int tx_delay, int rx_delay)
+{
+   struct device *dev = bsp_priv-pdev-dev;
+
+   if (IS_ERR(bsp_priv-grf)) {
+   dev_err(dev, %s: Missing rockchip,grf property\n, __func__);
+   return;
+   }
+
+   regmap_write(bsp_priv-grf, RK3368_GRF_SOC_CON15,
+RK3368_GMAC_PHY_INTF_SEL_RGMII |
+RK3368_GMAC_RMII_MODE_CLR);
+   regmap_write(bsp_priv-grf, RK3368_GRF_SOC_CON16,
+RK3368_GMAC_RXCLK_DLY_ENABLE |
+RK3368_GMAC_TXCLK_DLY_ENABLE |
+RK3368_GMAC_CLK_RX_DL_CFG(rx_delay) |
+RK3368_GMAC_CLK_TX_DL_CFG(tx_delay));
+}
+
+static void rk3368_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+   struct device *dev = bsp_priv-pdev-dev;
+
+   if (IS_ERR(bsp_priv-grf)) {
+   dev_err(dev, %s: Missing rockchip,grf property\n, __func__);
+   return;
+   }
+
+   regmap_write(bsp_priv-grf, RK3368_GRF_SOC_CON15,
+RK3368_GMAC_PHY_INTF_SEL_RMII | RK3368_GMAC_RMII_MODE);
+}
+
+static void rk3368_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+   struct device *dev = bsp_priv-pdev-dev;
+
+   if (IS_ERR(bsp_priv-grf)) {
+   dev_err(dev, %s: Missing rockchip,grf property\n, __func__);
+   return;
+   }
+
+   if (speed == 10)
+   regmap_write(bsp_priv-grf, RK3368_GRF_SOC_CON15,
+RK3368_GMAC_CLK_2_5M);
+   else if (speed == 100)
+   regmap_write(bsp_priv-grf, RK3368_GRF_SOC_CON15,
+RK3368_GMAC_CLK_25M);
+   else if (speed == 1000)
+   regmap_write(bsp_priv-grf, RK3368_GRF_SOC_CON15,
+RK3368_GMAC_CLK_125M);
+   else
+   dev_err(dev, unknown speed value for RGMII! speed=%d, speed);
+}
+
+static void rk3368_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+   struct device *dev = bsp_priv-pdev-dev;
+

Re: [PATCH] tipc:Make the function tipc_buf_append have a return type of bool

2015-06-17 Thread Ying Xue
On 06/18/2015 10:44 AM, Nicholas Krause wrote:
 This converts the function tipc_buf_append now due to this
 particular function only returning either one or zero as
 its return value.
 
 Signed-off-by: Nicholas Krause xerofo...@gmail.com

Acked-by: Ying Xue ying@windriver.com

 ---
  net/tipc/msg.c | 12 ++--
  net/tipc/msg.h |  2 +-
  2 files changed, 7 insertions(+), 7 deletions(-)
 
 diff --git a/net/tipc/msg.c b/net/tipc/msg.c
 index c3e96e8..52f2978 100644
 --- a/net/tipc/msg.c
 +++ b/net/tipc/msg.c
 @@ -115,9 +115,9 @@ struct sk_buff *tipc_msg_create(uint user, uint type,
   *out: set when successful non-complete reassembly, otherwise 
 NULL
   * @*buf: in:  the buffer to append. Always defined
   *out: head buf after successful complete reassembly, otherwise 
 NULL
 - * Returns 1 when reassembly complete, otherwise 0
 + * Returns true when reassembly complete, otherwise false
   */
 -int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
 +bool tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
  {
   struct sk_buff *head = *headbuf;
   struct sk_buff *frag = *buf;
 @@ -144,7 +144,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct 
 sk_buff **buf)
   skb_frag_list_init(head);
   TIPC_SKB_CB(head)-tail = NULL;
   *buf = NULL;
 - return 0;
 + return false;
   }
  
   if (!head)
 @@ -171,16 +171,16 @@ int tipc_buf_append(struct sk_buff **headbuf, struct 
 sk_buff **buf)
   *buf = head;
   TIPC_SKB_CB(head)-tail = NULL;
   *headbuf = NULL;
 - return 1;
 + return true;
   }
   *buf = NULL;
 - return 0;
 + return false;
  err:
   pr_warn_ratelimited(Unable to build fragment list\n);
   kfree_skb(*buf);
   kfree_skb(*headbuf);
   *buf = *headbuf = NULL;
 - return 0;
 + return false;
  }
  
  /* tipc_msg_validate - validate basic format of received message
 diff --git a/net/tipc/msg.h b/net/tipc/msg.h
 index e1d3595e..00d3357 100644
 --- a/net/tipc/msg.h
 +++ b/net/tipc/msg.h
 @@ -771,7 +771,7 @@ void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 
 user, u32 type,
  struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
   uint data_sz, u32 dnode, u32 onode,
   u32 dport, u32 oport, int errcode);
 -int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
 +bool tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf);
  bool tipc_msg_bundle(struct sk_buff *bskb, struct sk_buff *skb, u32 mtu);
  
  bool tipc_msg_make_bundle(struct sk_buff **skb, u32 mtu, u32 dnode);
 

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net 2/2] bridge: multicast: start querier timer when running user-space stp

2015-06-17 Thread Herbert Xu
On Wed, Jun 17, 2015 at 04:28:31AM -0700, Nikolay Aleksandrov wrote:
 From: Satish Ashok sas...@cumulusnetworks.com
 
 When STP is running in user-space and querier is configured, the
 querier timer is not started when a port goes to forwarding state.
 
 Signed-off-by: Satish Ashok sas...@cumulusnetworks.com
 Signed-off-by: Nikolay Aleksandrov niko...@cumulusnetworks.com
 Fixes: eb1d16414339 (bridge: Add core IGMP snooping support)
 ---
  net/bridge/br_stp.c | 3 +++
  1 file changed, 3 insertions(+)
 
 diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
 index fb3ebe615513..1e2f2f1ff6b0 100644
 --- a/net/bridge/br_stp.c
 +++ b/net/bridge/br_stp.c
 @@ -456,6 +456,9 @@ void br_port_state_selection(struct net_bridge *br)
   p-topology_change_ack = 0;
   br_make_blocking(p);
   }
 + } else if (br-stp_enabled == BR_USER_STP 
 +p-state == BR_STATE_FORWARDING) {
 + br_multicast_enable_port(p);
   }

Minor nit, the stp_enabled check appears to be redundant since
you're in the else clause.

More importantly, I'm not sure about the logic.  For kernel STP,
we enable the port as soon as we get out of blocking.  IIRC enabling
the port just means that we start tracking subscriptions/queries
so it should be OK to do even while we're listening/learning.

In any case the logic should be identical whether we use kernel
STP or user-space STP.

So how about removing br_multicast_enable_port from br_make_forward
and just add it here for both kernel and user-space STP?

Thanks,
-- 
Email: Herbert Xu herb...@gondor.apana.org.au
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


pull-request: wireless-drivers-next 2015-06-18

2015-06-17 Thread Kalle Valo
Hi Dave,

here's one more pull request I would like to get to 4.2 if possible.
Nothing major this time, just small stuff all over. Please let me know
if you have any problems.

Kalle

The following changes since commit c39c4c6abb89d24454b63798ccbae12b538206a5:

  tcp: double default TSQ output bytes limit (2015-06-04 01:09:36 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git 
tags/wireless-drivers-next-for-davem-2015-06-18

for you to fetch changes up to 40b503c76481aecf811a29a14a80c13b429b5e14:

  brcmfmac: make brcmf_p2p_detach() call conditional (2015-06-16 11:35:10 +0300)


Major changes:

mwifiex:

* enhancements for AP mode: support verbose information in station
  dump command and also information about AP link.
* enable power save by default

brcmfmac:

* fix module reload issue for PCIe
* improving msgbuf protocol for PCIe devices
* rework .get_station() cfg80211 callback operation
* determine interface combinations upon device feature support

ath9k:

* ath9k_htc: add support of channel switch

wil6210:

* add modparam for bcast ring size
* support hidden SSID
* add per-MCS Rx stats


Amitkumar Karwar (1):
  mwifiex: add missing break statement in switch case

Arend van Spriel (10):
  brcmfmac: remove chipinfo debugfs entry
  brcmfmac: remove watchdog reset from brcmf_pcie_buscoreprep()
  brcmfmac: use debugfs_create_devm_seqfile() helper function
  brcmfmac: rework .get_station() callback
  brcmfmac: have sdio return -EIO when device communication is not possible
  brcmfmac: free ifp for non-netdev interface in p2p module
  brcmfmac: move p2p attach/detach functions
  brcmfmac: assure p2pdev is unregistered upon driver unload
  brcmfmac: fix double free of p2pdev interface
  brcmfmac: make brcmf_p2p_detach() call conditional

Avinash Patil (12):
  mwifiex: verbose logging for association failure messages
  mwifiex: correct bss_type assignment
  mwifiex: support AP reset after bss_stop
  mwifiex: enable 11d after bss reset
  mwifiex: reset 11h active flag when chandef does not require dfs
  mwifiex: disable CAC upon radar detection event
  mwifiex: parse power constraint IE from Tail
  mwifiex: support downloading IEs from tail
  mwifiex: drop block-ack action frames
  mwifiex: advertise PS ON by default support to cfg80211
  mwifiex: update AP WMM settings from BSS_START event
  mwifiex: update current config_band info in start_ap

Brent Taylor (1):
  ath6kl: Fix multiple clients associating in AP mode

Chun-Yeow Yeoh (1):
  ath9k_htc: add support of channel switch

Chunfan Chen (1):
  mwifiex: handle BT coex event to adjust Rx BA window size

Felix Fietkau (1):
  ath9k: fix DMA stop sequence for AR9003+

Hamad Kadmany (1):
  wil6210: Support hidden SSID

Hans Ulli Kroll (1):
  rtlwifi: fix tm_trigger usage

Hante Meuleman (1):
  brcmfmac: Update msgbuf read pointer quicker.

Jakub Kicinski (5):
  mt7601u: unify paged and non-paged RX dma paths
  mt7601u: watch out for invalid-length frames
  mt7601u: don't cleanup device second time after .resume()
  mt7601u: set promiscous mode based on FIF_OTHER_BSS
  mt7601u: don't warn about devices without per-rate power table

Julia Lawall (1):
  wl1251: drop unneeded goto

Kalle Valo (1):
  Merge ath-next from ath.git

Michal Kazior (9):
  ath10k: move cycle_count macro
  ath10k: handle cycle counter wraparound
  ath10k: fix inconsistent survey reports
  ath10k: add missing firmware declarations
  ath10k: fix possible ps sleep crash
  ath10k: fix ar-rx_channel updating logic
  ath10k: remove ath10k_chanctx struct
  ath10k: fix channel switching
  ath10k: prevent memory leak in wmi rx ops

Pontus Fuchs (2):
  brcmfmac: Check if firmware supports p2p
  brcmfmac: Build wiphy mode and interface combinations dynamically

Rafał Miłecki (6):
  brcmfmac: support NVRAMs containing pci devpaths (instead of pcie)
  brcmfmac: set wiphy perm_addr to hardware MAC address
  brcmfmac: use direct data pointer in NVRAM parser struct
  b43: fix support for 14e4:4321 PCI dev with BCM4321 chipset
  bcma: make calls to PCI hostmode functions config-safe
  bcma: lower dependency of BCMA_DRIVER_PCI_HOSTMODE

Raja Mani (2):
  ath10k: free wmi mgmt event skb when parsing fails
  ath10k: remove unused variable 'id' in ath10k_pci_tx_pipe_cleanup()

Rajkumar Manoharan (1):
  ath10k: bypass PLL setting on target init for QCA9888

Stanislaw Gruszka (2):
  MAINTAINERS: remove rt2x00.serialmonkey.com list and web page
  rt2800: fix assigning same WCID for different stations

Taehee Yoo (8):
  rtlwifi: rtl8192cu: Fix performance issue.
  

[PATCH net] Revert tcp: switch tcp_fastopen key generation to net_get_random_once

2015-06-17 Thread Christoph Paasch
This reverts commit 222e83d2e0aecb6a5e8d42b1a8d51332a1eba960.

tcp_fastopen_reset_cipher really cannot be called from interrupt
context. It allocates the tcp_fastopen_context with GFP_KERNEL and
calls crypto_alloc_cipher, which allocates all kind of stuff with
GFP_KERNEL.

Thus, we might sleep when the key-generation is triggered by an
incoming TFO cookie-request which would then happen in interrupt-
context, as shown by enabling CONFIG_DEBUG_ATOMIC_SLEEP:

[   36.001813] BUG: sleeping function called from invalid context at 
mm/slub.c:1266
[   36.003624] in_atomic(): 1, irqs_disabled(): 0, pid: 1016, name: packetdrill
[   36.004859] CPU: 1 PID: 1016 Comm: packetdrill Not tainted 4.1.0-rc7 #14
[   36.006085] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014
[   36.008250]  04f2 88007f8838a8 8171d53a 
880075a084a8
[   36.009630]  880075a08000 88007f8838c8 810967d3 
88007f883928
[   36.011076]   88007f8838f8 81096892 
88007f89be00
[   36.012494] Call Trace:
[   36.012953]  IRQ  [8171d53a] dump_stack+0x4f/0x6d
[   36.014085]  [810967d3] ___might_sleep+0x103/0x170
[   36.015117]  [81096892] __might_sleep+0x52/0x90
[   36.016117]  [8118e887] kmem_cache_alloc_trace+0x47/0x190
[   36.017266]  [81680d82] ? tcp_fastopen_reset_cipher+0x42/0x130
[   36.018485]  [81680d82] tcp_fastopen_reset_cipher+0x42/0x130
[   36.019679]  [81680f01] tcp_fastopen_init_key_once+0x61/0x70
[   36.020884]  [81680f2c] __tcp_fastopen_cookie_gen+0x1c/0x60
[   36.022058]  [816814ff] tcp_try_fastopen+0x58f/0x730
[   36.023118]  [81671788] tcp_conn_request+0x3e8/0x7b0
[   36.024185]  [810e3872] ? __module_text_address+0x12/0x60
[   36.025327]  [8167b2e1] tcp_v4_conn_request+0x51/0x60
[   36.026410]  [816727e0] tcp_rcv_state_process+0x190/0xda0
[   36.027556]  [81661f97] ? __inet_lookup_established+0x47/0x170
[   36.028784]  [8167c2ad] tcp_v4_do_rcv+0x16d/0x3d0
[   36.029832]  [812e6806] ? security_sock_rcv_skb+0x16/0x20
[   36.030936]  [8167cc8a] tcp_v4_rcv+0x77a/0x7b0
[   36.031875]  [816af8c3] ? iptable_filter_hook+0x33/0x70
[   36.032953]  [81657d22] ip_local_deliver_finish+0x92/0x1f0
[   36.034065]  [81657f1a] ip_local_deliver+0x9a/0xb0
[   36.035069]  [81657c90] ? ip_rcv+0x3d0/0x3d0
[   36.035963]  [81657569] ip_rcv_finish+0x119/0x330
[   36.036950]  [81657ba7] ip_rcv+0x2e7/0x3d0
[   36.037847]  [81610652] __netif_receive_skb_core+0x552/0x930
[   36.038994]  [81610a57] __netif_receive_skb+0x27/0x70
[   36.040033]  [81610b72] process_backlog+0xd2/0x1f0
[   36.041025]  [81611482] net_rx_action+0x122/0x310
[   36.042007]  [81076743] __do_softirq+0x103/0x2f0
[   36.042978]  [81723e3c] do_softirq_own_stack+0x1c/0x30

There does not seem to be a better way to handle this. We could try
to make the call to kmalloc and crypto_alloc_cipher during bootup, and
then generate the random value only on-the-fly (when the first TFO-SYN
comes in) with net_get_random_once in order to have the better entropy
that comes with doing the late initialisation of the random value. But
that's probably net-next material.

Cc: Hannes Frederic Sowa han...@stressinduktion.org
Fixes: 222e83d2e0ae (tcp: switch tcp_fastopen key generation to 
net_get_random_once)
Signed-off-by: Christoph Paasch cpaa...@apple.com
---
 include/net/tcp.h  |  1 -
 net/ipv4/sysctl_net_ipv4.c |  5 -
 net/ipv4/tcp_fastopen.c| 27 +++
 3 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6d204f3f9df8..f27a4e6bae11 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1359,7 +1359,6 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff 
*skb,
  struct request_sock *req,
  struct tcp_fastopen_cookie *foc,
  struct dst_entry *dst);
-void tcp_fastopen_init_key_once(bool publish);
 #define TCP_FASTOPEN_KEY_LENGTH 16
 
 /* Fastopen key context */
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c3852a7ff3c7..46d6b3817b41 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -231,11 +231,6 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, 
int write,
ret = -EINVAL;
goto bad_key;
}
-   /* Generate a dummy secret but don't publish it. This
-* is needed so we don't regenerate a new key on the
-* first invocation of tcp_fastopen_cookie_gen
-*/
-   tcp_fastopen_init_key_once(false);
tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
}
 

[PATCH 05/22] fjes: ES information acquisition routine

2015-06-17 Thread Taku Izumi
This patch adds ES information acquisition routine.
ES information can be retrieved issuing information
request command. ES information includes which
receiver is same zone.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes_hw.c   | 101 ++
 drivers/platform/x86/fjes/fjes_hw.h   |  24 
 drivers/platform/x86/fjes/fjes_regs.h |  23 
 3 files changed, 148 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes_hw.c 
b/drivers/platform/x86/fjes/fjes_hw.c
index d1f090a..eb04d9a 100644
--- a/drivers/platform/x86/fjes/fjes_hw.c
+++ b/drivers/platform/x86/fjes/fjes_hw.c
@@ -360,6 +360,107 @@ void fjes_hw_exit(struct fjes_hw *hw)
fjes_hw_cleanup(hw);
 }
 
+static enum fjes_dev_command_response_e fjes_hw_issue_request_command(
+   struct fjes_hw *hw, enum fjes_dev_command_request_type type)
+{
+   union REG_CR cr;
+   union REG_CS cs;
+   enum fjes_dev_command_response_e ret = FJES_CMD_STATUS_UNKNOWN;
+   int timeout;
+
+   cr.Reg = 0;
+   cr.Bits.req_start = 1;
+   cr.Bits.req_code = type;
+   wr32(XSCT_CR, cr.Reg);
+   cr.Reg = rd32(XSCT_CR);
+
+   if (cr.Bits.error == 0) {
+   timeout = FJES_COMMAND_REQ_TIMEOUT * 1000;
+   cs.Reg = rd32(XSCT_CS);
+
+   while ((cs.Bits.complete != 1)  timeout  0) {
+   msleep(1000);
+   cs.Reg = rd32(XSCT_CS);
+   timeout -= 1000;
+   }
+
+   if (cs.Bits.complete == 1)
+   ret = FJES_CMD_STATUS_NORMAL;
+   else if (timeout = 0)
+   ret = FJES_CMD_STATUS_TIMEOUT;
+
+   } else {
+   switch (cr.Bits.err_info) {
+   case FJES_CMD_REQ_ERR_INFO_PARAM:
+   ret = FJES_CMD_STATUS_ERROR_PARAM;
+   break;
+   case FJES_CMD_REQ_ERR_INFO_STATUS:
+   ret = FJES_CMD_STATUS_ERROR_STATUS;
+   break;
+   default:
+   ret = FJES_CMD_STATUS_UNKNOWN;
+   break;
+   }
+   }
+
+   return ret;
+}
+
+int fjes_hw_request_info(struct fjes_hw *hw)
+{
+   union fjes_device_command_req *req_buf = hw-hw_info.req_buf;
+   union fjes_device_command_res *res_buf = hw-hw_info.res_buf;
+   enum fjes_dev_command_response_e ret;
+   int result;
+
+   memset(req_buf, 0, hw-hw_info.req_buf_size);
+   memset(res_buf, 0, hw-hw_info.res_buf_size);
+
+   req_buf-info.length = FJES_DEV_COMMAND_INFO_REQ_LEN;
+
+   res_buf-info.length = 0;
+   res_buf-info.code = 0;
+
+   ret = fjes_hw_issue_request_command(hw, FJES_CMD_REQ_INFO);
+
+   result = 0;
+
+   if (FJES_DEV_COMMAND_INFO_RES_LEN((*(hw-hw_info.max_epid))) !=
+   res_buf-info.length) {
+   result = -ENOMSG;
+   } else if (ret == FJES_CMD_STATUS_NORMAL) {
+
+   switch (res_buf-info.code) {
+   case FJES_CMD_REQ_RES_CODE_NORMAL:
+   result = 0;
+   break;
+   default:
+   result = -EPERM;
+   break;
+   }
+   } else {
+   switch (ret) {
+   case FJES_CMD_STATUS_UNKNOWN:
+   result = -EPERM;
+   break;
+   case FJES_CMD_STATUS_TIMEOUT:
+   result = -EBUSY;
+   break;
+   case FJES_CMD_STATUS_ERROR_PARAM:
+   result = -EPERM;
+   break;
+   case FJES_CMD_STATUS_ERROR_STATUS:
+   result = -EPERM;
+   break;
+   default:
+   result = -EPERM;
+   break;
+   }
+   }
+
+   return result;
+}
+
 void fjes_hw_set_irqmask(struct fjes_hw *hw, enum REG_ICTL_MASK intr_mask,
bool mask)
 {
diff --git a/drivers/platform/x86/fjes/fjes_hw.h 
b/drivers/platform/x86/fjes/fjes_hw.h
index 0bb2d51..7861fe3 100644
--- a/drivers/platform/x86/fjes/fjes_hw.h
+++ b/drivers/platform/x86/fjes/fjes_hw.h
@@ -33,6 +33,12 @@ struct fjes_hw;
 #define EP_BUFFER_INFO_SIZE 4096
 
 #define FJES_DEVICE_RESET_TIMEOUT  ((17 + 1) * 3) /* sec */
+#define FJES_COMMAND_REQ_TIMEOUT  (5 + 1) /* sec */
+
+#define FJES_CMD_REQ_ERR_INFO_PARAM  (0x0001)
+#define FJES_CMD_REQ_ERR_INFO_STATUS (0x0002)
+
+#define FJES_CMD_REQ_RES_CODE_NORMAL (0)
 
 #define EP_BUFFER_SIZE \
(((sizeof(union ep_buffer_info) + (128 * (64 * 1024))) \
@@ -49,6 +55,7 @@ struct fjes_hw;
((size) - sizeof(struct esmem_frame_t) - \
(ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN))
 
+#define FJES_DEV_COMMAND_INFO_REQ_LEN  (4)
 #define FJES_DEV_COMMAND_INFO_RES_LEN(epnum) (8 + 2*(epnum))
 #define FJES_DEV_COMMAND_SHARE_BUFFER_REQ_LEN(txb, 

[PATCH 17/22] fjes: force_close_task

2015-06-17 Thread Taku Izumi
This patch adds force_close_task.
This task is used to close network device forcibly.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes.h  |  1 +
 drivers/platform/x86/fjes/fjes_main.c | 13 +
 2 files changed, 14 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes.h b/drivers/platform/x86/fjes/fjes.h
index 1a880a0..e50fd01 100644
--- a/drivers/platform/x86/fjes/fjes.h
+++ b/drivers/platform/x86/fjes/fjes.h
@@ -49,6 +49,7 @@ struct fjes_adapter {
unsigned long rx_last_jiffies;
bool unset_rx_last;
 
+   struct work_struct force_close_task;
bool force_reset;
bool open_guard;
 
diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index 8e78014..3931bcc 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -56,6 +56,7 @@ static netdev_tx_t fjes_xmit_frame(struct sk_buff *,
struct net_device *);
 static void fjes_raise_intr_rxdata_task(struct work_struct *);
 static void fjes_tx_stall_task(struct work_struct *);
+static void fjes_force_close_task(struct work_struct *);
 static irqreturn_t fjes_intr(int, void*);
 static struct rtnl_link_stats64
 *fjes_get_stats64(struct net_device *, struct rtnl_link_stats64 *);
@@ -509,6 +510,17 @@ static void fjes_tx_stall_task(struct work_struct *work)
queue_work(adapter-txrx_wq, adapter-tx_stall_task);
 }
 
+static void fjes_force_close_task(struct work_struct *work)
+{
+   struct fjes_adapter *adapter = container_of(work,
+   struct fjes_adapter, force_close_task);
+   struct net_device *netdev = adapter-netdev;
+
+   rtnl_lock();
+   dev_close(netdev);
+   rtnl_unlock();
+}
+
 static void fjes_raise_intr_rxdata_task(struct work_struct *work)
 {
struct fjes_adapter *adapter = container_of(work,
@@ -1053,6 +1065,7 @@ static int fjes_probe(struct platform_device *plat_dev)
if (err)
goto err_sw_init;
 
+   INIT_WORK(adapter-force_close_task, fjes_force_close_task);
adapter-force_reset = false;
adapter-open_guard = false;
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 22/22] fjes: ethtool support

2015-06-17 Thread Taku Izumi
This patch adds implementation for ethtool support.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/Makefile   |   2 +-
 drivers/platform/x86/fjes/fjes.h |   2 +
 drivers/platform/x86/fjes/fjes_ethtool.c | 135 +++
 drivers/platform/x86/fjes/fjes_main.c|   1 +
 4 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/x86/fjes/fjes_ethtool.c

diff --git a/drivers/platform/x86/fjes/Makefile 
b/drivers/platform/x86/fjes/Makefile
index a67f65d8..8ca4de0 100644
--- a/drivers/platform/x86/fjes/Makefile
+++ b/drivers/platform/x86/fjes/Makefile
@@ -27,5 +27,5 @@
 
 obj-$(CONFIG_FUJITSU_ES) += fjes.o
 
-fjes-objs := fjes_main.o fjes_hw.o
+fjes-objs := fjes_main.o fjes_hw.o fjes_ethtool.o
 
diff --git a/drivers/platform/x86/fjes/fjes.h b/drivers/platform/x86/fjes/fjes.h
index 2e814bb..23ac394 100644
--- a/drivers/platform/x86/fjes/fjes.h
+++ b/drivers/platform/x86/fjes/fjes.h
@@ -74,4 +74,6 @@ extern char fjes_driver_name[];
 extern char fjes_driver_version[];
 extern u32 fjes_support_mtu[];
 
+void fjes_set_ethtool_ops(struct net_device *);
+
 #endif /* FJES_H_ */
diff --git a/drivers/platform/x86/fjes/fjes_ethtool.c 
b/drivers/platform/x86/fjes/fjes_ethtool.c
new file mode 100644
index 000..49974d2
--- /dev/null
+++ b/drivers/platform/x86/fjes/fjes_ethtool.c
@@ -0,0 +1,135 @@
+/*
+ *  FUJITSU Extended Socket Network Device driver
+ *  Copyright (c) 2015 FUJITSU LIMITED
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see http://www.gnu.org/licenses/.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called COPYING.
+ *
+ */
+
+/* ethtool support for fjes */
+
+#include linux/vmalloc.h
+#include linux/netdevice.h
+#include linux/ethtool.h
+
+#include fjes.h
+
+
+struct fjes_stats {
+   char stat_string[ETH_GSTRING_LEN];
+   int sizeof_stat;
+   int stat_offset;
+};
+
+#define FJES_STAT(name, stat) { \
+   .stat_string = name, \
+   .sizeof_stat = FIELD_SIZEOF(struct fjes_adapter, stat), \
+   .stat_offset = offsetof(struct fjes_adapter, stat) \
+}
+
+
+static const struct fjes_stats fjes_gstrings_stats[] = {
+   FJES_STAT(rx_packets, stats64.rx_packets),
+   FJES_STAT(tx_packets, stats64.tx_packets),
+   FJES_STAT(rx_bytes, stats64.rx_bytes),
+   FJES_STAT(tx_bytes, stats64.rx_bytes),
+   FJES_STAT(rx_dropped, stats64.rx_dropped),
+   FJES_STAT(tx_dropped, stats64.tx_dropped),
+};
+
+static void fjes_get_ethtool_stats(struct net_device *netdev,
+   struct ethtool_stats *stats, u64 *data)
+{
+   struct fjes_adapter *adapter = netdev_priv(netdev);
+   int i;
+   char *p = NULL;
+
+   for (i = 0; i  ARRAY_SIZE(fjes_gstrings_stats); i++) {
+   p = (char *)adapter + fjes_gstrings_stats[i].stat_offset;
+   data[i] = (fjes_gstrings_stats[i].sizeof_stat == sizeof(u64))
+   ? *(u64 *)p : *(u32 *)p;
+   }
+}
+
+static void fjes_get_strings(struct net_device *netdev,
+   u32 stringset, u8 *data)
+{
+   u8 *p = data;
+   int i;
+
+   switch (stringset) {
+   case ETH_SS_STATS:
+   for (i = 0; i  ARRAY_SIZE(fjes_gstrings_stats); i++) {
+   memcpy(p, fjes_gstrings_stats[i].stat_string,
+   ETH_GSTRING_LEN);
+   p += ETH_GSTRING_LEN;
+   }
+   break;
+   }
+}
+
+
+static int fjes_get_sset_count(struct net_device *netdev, int sset)
+{
+   switch (sset) {
+   case ETH_SS_STATS:
+   return ARRAY_SIZE(fjes_gstrings_stats);
+   default:
+   return -EOPNOTSUPP;
+   }
+}
+
+static void fjes_get_drvinfo(struct net_device *netdev,
+   struct ethtool_drvinfo *drvinfo)
+{
+   strlcpy(drvinfo-driver, fjes_driver_name, sizeof(drvinfo-driver));
+   strlcpy(drvinfo-version, fjes_driver_version,
+   sizeof(drvinfo-version));
+
+   strlcpy(drvinfo-fw_version, none, sizeof(drvinfo-fw_version));
+   strlcpy(drvinfo-bus_info, none, sizeof(drvinfo-bus_info));
+   drvinfo-regdump_len = 0;
+   drvinfo-eedump_len = 0;
+
+}
+
+
+static int fjes_get_settings(struct net_device *netdev,
+   struct ethtool_cmd *ecmd)
+{
+   ecmd-supported = 0;
+   ecmd-advertising = 0;
+   ecmd-duplex = DUPLEX_FULL;
+   ecmd-autoneg 

[PATCH 19/22] fjes: update_zone_task

2015-06-17 Thread Taku Izumi
This patch adds update_zone_task.

Zoning information can be changed by user.
This task is used to monitor if zoning information is
changed or not.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes_hw.c   | 183 ++
 drivers/platform/x86/fjes/fjes_hw.h   |   1 +
 drivers/platform/x86/fjes/fjes_main.c |  14 +++
 3 files changed, 198 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes_hw.c 
b/drivers/platform/x86/fjes/fjes_hw.c
index 85f9693..e07b266 100644
--- a/drivers/platform/x86/fjes/fjes_hw.c
+++ b/drivers/platform/x86/fjes/fjes_hw.c
@@ -22,6 +22,8 @@
 #include fjes_hw.h
 #include fjes.h
 
+static void fjes_hw_update_zone_task(struct work_struct *);
+
 /* supported MTU list */
 u32 fjes_support_mtu[] = {
FJES_MTU_DEFINE(8 * 1024),
@@ -331,6 +333,8 @@ int fjes_hw_init(struct fjes_hw *hw)
 
fjes_hw_set_irqmask(hw, REG_ICTL_MASK_ALL, true);
 
+   INIT_WORK(hw-update_zone_task, fjes_hw_update_zone_task);
+
mutex_init(hw-hw_info.lock);
 
hw-max_epid = fjes_hw_get_max_epid(hw);
@@ -358,6 +362,8 @@ void fjes_hw_exit(struct fjes_hw *hw)
}
 
fjes_hw_cleanup(hw);
+
+   cancel_work_sync(hw-update_zone_task);
 }
 
 static enum fjes_dev_command_response_e fjes_hw_issue_request_command(
@@ -940,3 +946,180 @@ int fjes_hw_epbuf_tx_pkt_send(struct epbuf_handler *epbh,
return 0;
 }
 
+static void fjes_hw_update_zone_task(struct work_struct *work)
+{
+
+   struct fjes_hw *hw = container_of(work,
+   struct fjes_hw, update_zone_task);
+   struct fjes_adapter *adapter = (struct fjes_adapter *)hw-back;
+   struct net_device *netdev = adapter-netdev;
+   int ret;
+   int epidx;
+   enum ep_partner_status pstatus;
+   unsigned long share_bit = 0;
+   unsigned long unshare_bit = 0;
+   unsigned long irq_bit = 0;
+   bool update = false;
+   union fjes_device_command_res *res_buf =
+   hw-hw_info.res_buf;
+
+   mutex_lock(hw-hw_info.lock);
+
+   ret = fjes_hw_request_info(hw);
+   switch (ret) {
+   case -ENOMSG:
+   case -EBUSY:
+   default:
+   if (!work_pending(adapter-force_close_task)) {
+   adapter-force_reset = true;
+   schedule_work(adapter-force_close_task);
+   }
+   break;
+
+   case 0:
+
+   for (epidx = 0; epidx  hw-max_epid; epidx++) {
+   if (epidx != hw-my_epid) {
+
+   pstatus = fjes_hw_get_partner_ep_status(hw, 
epidx);
+   switch (pstatus) {
+   case EP_PARTNER_UNSHARE:
+   default:
+   if ((res_buf-info.info[epidx].zone !=
+   FJES_ZONING_ZONE_TYPE_NONE) 
+   
(res_buf-info.info[epidx].es_status ==
+   FJES_ZONING_STATUS_ENABLE) 
+   (res_buf-info.info[epidx].zone ==
+   
res_buf-info.info[hw-my_epid].zone))
+   set_bit(epidx, share_bit);
+   else
+   set_bit(epidx, unshare_bit);
+   break;
+
+   case EP_PARTNER_COMPLETE:
+   case EP_PARTNER_WAITING:
+   if ((res_buf-info.info[epidx].zone ==
+   FJES_ZONING_ZONE_TYPE_NONE) ||
+   
(res_buf-info.info[epidx].es_status !=
+   FJES_ZONING_STATUS_ENABLE) ||
+   (res_buf-info.info[epidx].zone !=
+   res_buf-info.
+ info[hw-my_epid].zone)) {
+
+   set_bit(epidx,
+ 
adapter-unshare_watch_bitmask);
+   set_bit(epidx,
+ 
hw-hw_info.buffer_unshare_reserve_bit);
+   }
+   break;
+
+   case EP_PARTNER_SHARED:
+   if ((res_buf-info.info[epidx].zone ==
+   FJES_ZONING_ZONE_TYPE_NONE) ||
+   
(res_buf-info.info[epidx].es_status !=
+   FJES_ZONING_STATUS_ENABLE) ||
+   

Re: [PATCH 19/22] fjes: update_zone_task

2015-06-17 Thread Joe Perches
On Thu, 2015-06-18 at 09:49 +0900, Taku Izumi wrote:
 This patch adds update_zone_task.

 diff --git a/drivers/platform/x86/fjes/fjes_hw.c 
 b/drivers/platform/x86/fjes/fjes_hw.c
[]
 +static void fjes_hw_update_zone_task(struct work_struct *work)
 +{

Some of the line length can be removed here by using a
temporary, but these would look a lot better if you went
beyond 80 columns.

info = res_buf-info.info;
[]
 + case EP_PARTNER_UNSHARE:
 + default:
 + if ((res_buf-info.info[epidx].zone !=
 + FJES_ZONING_ZONE_TYPE_NONE) 
 + 
 (res_buf-info.info[epidx].es_status ==
 + FJES_ZONING_STATUS_ENABLE) 
 + (res_buf-info.info[epidx].zone ==
 + 
 res_buf-info.info[hw-my_epid].zone))

So these become
if ((info[epidx].zone != 
FJES_ZONING_ZONE_TYPE_NONE) 
(info[epidx].es_status == 
FJES_ZONING_STATUS_ENABLE) 
(info[epidx].zone == 
info[hw-my_epid.zone))

 + case EP_PARTNER_COMPLETE:
 + case EP_PARTNER_WAITING:
 + if ((res_buf-info.info[epidx].zone ==
 + FJES_ZONING_ZONE_TYPE_NONE) ||
 + 
 (res_buf-info.info[epidx].es_status !=
 + FJES_ZONING_STATUS_ENABLE) ||
 + (res_buf-info.info[epidx].zone !=
 + res_buf-info.
 +   info[hw-my_epid].zone)) {

etc...


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 18/22] fjes: unshare_watch_task

2015-06-17 Thread Taku Izumi
This patch adds unshare_watch_task.

Shared buffer's status can be changed into unshared.
This task is used to monitor shared buffer's status.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes.h  |   3 +
 drivers/platform/x86/fjes/fjes_main.c | 152 ++
 2 files changed, 155 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes.h b/drivers/platform/x86/fjes/fjes.h
index e50fd01..2e814bb 100644
--- a/drivers/platform/x86/fjes/fjes.h
+++ b/drivers/platform/x86/fjes/fjes.h
@@ -61,6 +61,9 @@ struct fjes_adapter {
struct work_struct tx_stall_task;
struct work_struct raise_intr_rxdata_task;
 
+   struct work_struct unshare_watch_task;
+   unsigned long unshare_watch_bitmask;
+
struct delayed_work interrupt_watch_task;
bool interrupt_watch_enable;
 
diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index 3931bcc..1bba967 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -75,6 +75,7 @@ static int fjes_remove(struct platform_device *);
 static int fjes_sw_init(struct fjes_adapter *);
 static void fjes_netdev_setup(struct net_device *);
 static void fjes_irq_watch_task(struct work_struct *);
+static void fjes_watch_unshare_task(struct work_struct *);
 static void fjes_rx_irq(struct fjes_adapter *, int);
 static int fjes_poll(struct napi_struct *, int);
 
@@ -317,6 +318,8 @@ static int fjes_close(struct net_device *netdev)
fjes_free_irq(adapter);
 
cancel_delayed_work_sync(adapter-interrupt_watch_task);
+   cancel_work_sync(adapter-unshare_watch_task);
+   adapter-unshare_watch_bitmask = 0;
cancel_work_sync(adapter-raise_intr_rxdata_task);
cancel_work_sync(adapter-tx_stall_task);
 
@@ -1075,6 +1078,8 @@ static int fjes_probe(struct platform_device *plat_dev)
INIT_WORK(adapter-tx_stall_task, fjes_tx_stall_task);
INIT_WORK(adapter-raise_intr_rxdata_task,
fjes_raise_intr_rxdata_task);
+   INIT_WORK(adapter-unshare_watch_task, fjes_watch_unshare_task);
+   adapter-unshare_watch_bitmask = 0;
 
INIT_DELAYED_WORK(adapter-interrupt_watch_task, fjes_irq_watch_task);
adapter-interrupt_watch_enable = false;
@@ -1121,7 +1126,9 @@ static int fjes_remove(struct platform_device *plat_dev)
struct fjes_adapter *adapter = netdev_priv(netdev);
struct fjes_hw *hw = adapter-hw;
 
+
cancel_delayed_work_sync(adapter-interrupt_watch_task);
+   cancel_work_sync(adapter-unshare_watch_task);
cancel_work_sync(adapter-raise_intr_rxdata_task);
cancel_work_sync(adapter-tx_stall_task);
if (adapter-control_wq)
@@ -1185,6 +1192,151 @@ static void fjes_irq_watch_task(struct work_struct 
*work)
 
 }
 
+static void fjes_watch_unshare_task(struct work_struct *work)
+{
+   struct fjes_adapter *adapter = container_of(work,
+   struct fjes_adapter, unshare_watch_task);
+   struct fjes_hw *hw = adapter-hw;
+   struct net_device *netdev = adapter-netdev;
+   int epidx;
+   int max_epid, my_epid;
+   unsigned long unshare_watch_bitmask;
+   int wait_time = 0;
+   int is_shared;
+   int ret;
+
+   my_epid = hw-my_epid;
+   max_epid = hw-max_epid;
+
+   unshare_watch_bitmask = adapter-unshare_watch_bitmask;
+   adapter-unshare_watch_bitmask = 0;
+
+   while ((unshare_watch_bitmask || hw-txrx_stop_req_bit) 
+   (wait_time  3000)) {
+
+   for (epidx = 0; epidx  hw-max_epid; epidx++) {
+
+   if (epidx == hw-my_epid)
+   continue;
+
+   if (test_bit(epidx, hw-txrx_stop_req_bit)) {
+
+   is_shared = 
fjes_hw_epid_is_shared(hw-hw_info.share, epidx);
+   if (!is_shared ||
+   (is_shared 
+(FJES_RX_STOP_REQ_DONE 
+ 
hw-ep_shm_info[epidx].rx.info-v1i.rx_status))) {
+
+   mutex_lock(hw-hw_info.lock);
+   ret = fjes_hw_unregister_buff_addr(hw, 
epidx);
+   switch (ret) {
+   case 0:
+   break;
+   case -ENOMSG:
+   case -EBUSY:
+   default:
+
+   if 
(!work_pending(adapter-force_close_task)) {
+   adapter-force_reset = 
true;
+   
schedule_work(adapter-force_close_task);
+   }
+  

[PATCH 21/22] fjes: handle receive cancellation request interrupt

2015-06-17 Thread Taku Izumi
This patch adds implementation of handling IRQ
of other receiver's receive cancellation request.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes_main.c | 85 +++
 1 file changed, 85 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index f42441f..f33dafb 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -855,6 +855,81 @@ static int fjes_vlan_rx_kill_vid(struct net_device *netdev,
return 0;
 }
 
+static void fjes_txrx_stop_req_irq(struct fjes_adapter *adapter,
+   int src_epid)
+{
+   struct fjes_hw *hw = adapter-hw;
+   enum ep_partner_status status;
+
+   status = fjes_hw_get_partner_ep_status(hw, src_epid);
+   switch (status) {
+   case EP_PARTNER_UNSHARE:
+   default:
+   break;
+   case EP_PARTNER_COMPLETE:
+   break;
+   case EP_PARTNER_WAITING:
+   if (src_epid  hw-my_epid) {
+   hw-ep_shm_info[src_epid].tx.info-v1i.rx_status |=
+   FJES_RX_STOP_REQ_DONE;
+
+   clear_bit(src_epid, hw-txrx_stop_req_bit);
+   set_bit(src_epid, adapter-unshare_watch_bitmask);
+
+   if (!work_pending(adapter-unshare_watch_task))
+   queue_work(adapter-control_wq,
+   adapter-unshare_watch_task);
+   }
+   break;
+   case EP_PARTNER_SHARED:
+   if (hw-ep_shm_info[src_epid].rx.info-v1i.rx_status
+FJES_RX_STOP_REQ_REQUEST) {
+
+   set_bit(src_epid, hw-epstop_req_bit);
+
+   if (!work_pending(hw-epstop_task))
+   queue_work(adapter-control_wq, 
hw-epstop_task);
+
+   }
+   break;
+   }
+}
+
+static void fjes_stop_req_irq(struct fjes_adapter *adapter,
+   int src_epid)
+{
+   struct fjes_hw *hw = adapter-hw;
+   enum ep_partner_status status;
+
+   set_bit(src_epid, hw-hw_info.buffer_unshare_reserve_bit);
+
+   status = fjes_hw_get_partner_ep_status(hw, src_epid);
+   switch (status) {
+   case EP_PARTNER_WAITING:
+   hw-ep_shm_info[src_epid].tx.info-v1i.rx_status |=
+   FJES_RX_STOP_REQ_DONE;
+   clear_bit(src_epid, hw-txrx_stop_req_bit);
+   /* fall through */
+   case EP_PARTNER_UNSHARE:
+   case EP_PARTNER_COMPLETE:
+   default:
+   set_bit(src_epid, adapter-unshare_watch_bitmask);
+   if (!work_pending(adapter-unshare_watch_task))
+   queue_work(adapter-control_wq,
+   adapter-unshare_watch_task);
+
+   break;
+   case EP_PARTNER_SHARED:
+   set_bit(src_epid, hw-epstop_req_bit);
+
+   if (!work_pending(hw-epstop_task))
+   queue_work(adapter-control_wq, hw-epstop_task);
+
+   break;
+   }
+
+}
+
 static void fjes_update_zone_irq(struct fjes_adapter *adapter,
int src_epid)
 {
@@ -878,6 +953,16 @@ static irqreturn_t fjes_intr(int irq, void *data)
if (icr  REG_ICTL_MASK_RX_DATA)
fjes_rx_irq(adapter, icr  REG_IS_MASK_EPID);
 
+   if (icr  REG_ICTL_MASK_DEV_STOP_REQ)
+   fjes_stop_req_irq(adapter, icr  REG_IS_MASK_EPID);
+
+   if (icr  REG_ICTL_MASK_TXRX_STOP_REQ)
+   fjes_txrx_stop_req_irq(adapter, icr  REG_IS_MASK_EPID);
+
+   if (icr  REG_ICTL_MASK_TXRX_STOP_DONE)
+   fjes_hw_set_irqmask(hw,
+   REG_ICTL_MASK_TXRX_STOP_DONE, true);
+
if (icr  REG_ICTL_MASK_INFO_UPDATE)
fjes_update_zone_irq(adapter, icr  REG_IS_MASK_EPID);
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/22] fjes: net_device_ops.ndo_tx_timeout

2015-06-17 Thread Taku Izumi
This patch adds net_device_ops.ndo_tx_timeout callback.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes_main.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index 72541a7..84727d8 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -60,6 +60,7 @@ static irqreturn_t fjes_intr(int, void*);
 static struct rtnl_link_stats64
 *fjes_get_stats64(struct net_device *, struct rtnl_link_stats64 *);
 static int fjes_change_mtu(struct net_device *, int);
+static void fjes_tx_retry(struct net_device *);
 
 static int fjes_acpi_add(struct acpi_device *);
 static int fjes_acpi_remove(struct acpi_device *);
@@ -228,6 +229,7 @@ static const struct net_device_ops fjes_netdev_ops = {
.ndo_start_xmit = fjes_xmit_frame,
.ndo_get_stats64= fjes_get_stats64,
.ndo_change_mtu = fjes_change_mtu,
+   .ndo_tx_timeout = fjes_tx_retry,
 };
 
 /*
@@ -739,6 +741,13 @@ static netdev_tx_t fjes_xmit_frame(struct sk_buff *skb,
return ret;
 }
 
+static void fjes_tx_retry(struct net_device *netdev)
+{
+   struct netdev_queue *curQueue = netdev_get_tx_queue(netdev, 0);
+
+   netif_tx_wake_queue(curQueue);
+}
+
 static struct rtnl_link_stats64
 *fjes_get_stats64(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/22] fjes: Hardware initialization routine

2015-06-17 Thread Taku Izumi
This patch adds hardware initialization routine to be
invoked at driver's .probe routine.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/Makefile|   2 +-
 drivers/platform/x86/fjes/fjes_hw.c   | 305 ++
 drivers/platform/x86/fjes/fjes_hw.h   | 254 
 drivers/platform/x86/fjes/fjes_regs.h | 110 
 4 files changed, 670 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/x86/fjes/fjes_hw.c
 create mode 100644 drivers/platform/x86/fjes/fjes_hw.h
 create mode 100644 drivers/platform/x86/fjes/fjes_regs.h

diff --git a/drivers/platform/x86/fjes/Makefile 
b/drivers/platform/x86/fjes/Makefile
index 98e59cb..a67f65d8 100644
--- a/drivers/platform/x86/fjes/Makefile
+++ b/drivers/platform/x86/fjes/Makefile
@@ -27,5 +27,5 @@
 
 obj-$(CONFIG_FUJITSU_ES) += fjes.o
 
-fjes-objs := fjes_main.o
+fjes-objs := fjes_main.o fjes_hw.o
 
diff --git a/drivers/platform/x86/fjes/fjes_hw.c 
b/drivers/platform/x86/fjes/fjes_hw.c
new file mode 100644
index 000..1731827
--- /dev/null
+++ b/drivers/platform/x86/fjes/fjes_hw.c
@@ -0,0 +1,305 @@
+/*
+ *  FUJITSU Extended Socket Network Device driver
+ *  Copyright (c) 2015 FUJITSU LIMITED
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see http://www.gnu.org/licenses/.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called COPYING.
+ *
+ */
+
+#include fjes_hw.h
+#include fjes.h
+
+/* supported MTU list */
+u32 fjes_support_mtu[] = {
+   FJES_MTU_DEFINE(8 * 1024),
+   FJES_MTU_DEFINE(16 * 1024),
+   FJES_MTU_DEFINE(32 * 1024),
+   FJES_MTU_DEFINE(64 * 1024),
+   0
+};
+
+u32 fjes_hw_rd32(struct fjes_hw *hw, u32 reg)
+{
+   u8 *base = hw-base;
+   u32 value = 0;
+
+   value = readl(base[reg]);
+
+   return value;
+}
+
+static u8 *fjes_hw_iomap(struct fjes_hw *hw)
+{
+   u8 *base;
+
+   if (!request_mem_region(hw-hw_res.start, hw-hw_res.size,
+   fjes_driver_name)) {
+   pr_err(request_mem_region failed);
+   return NULL;
+   }
+
+   base = (u8 *)ioremap_nocache(hw-hw_res.start, hw-hw_res.size);
+
+   return base;
+}
+
+
+int fjes_hw_reset(struct fjes_hw *hw)
+{
+
+   int timeout;
+   union REG_DCTL dctl;
+
+   dctl.Reg = 0;
+   dctl.Bits.reset = 1;
+   wr32(XSCT_DCTL, dctl.Reg);
+
+
+   timeout = FJES_DEVICE_RESET_TIMEOUT * 1000;
+   dctl.Reg = rd32(XSCT_DCTL);
+   while ((dctl.Bits.reset == 1)  (timeout  0)) {
+   msleep(1000);
+   dctl.Reg = rd32(XSCT_DCTL);
+   timeout -= 1000;
+   }
+
+   return timeout = 0 ? 0 : -EIO;
+
+}
+
+static int fjes_hw_get_max_epid(struct fjes_hw *hw)
+{
+   union REG_MAX_EP info;
+
+   info.Reg = rd32(XSCT_MAX_EP);
+
+   return info.Bits.maxep;
+}
+
+static int fjes_hw_get_my_epid(struct fjes_hw *hw)
+{
+   union REG_OWNER_EPID info;
+
+   info.Reg = rd32(XSCT_OWNER_EPID);
+
+   return info.Bits.epid;
+}
+
+static int fjes_hw_alloc_shared_status_region(struct fjes_hw *hw)
+{
+   size_t size;
+
+   size = sizeof(struct fjes_device_shared_info) +
+   (sizeof(u8) * hw-max_epid);
+   hw-hw_info.share = kzalloc(size, GFP_KERNEL);
+   if (!hw-hw_info.share)
+   return -ENOMEM;
+
+   hw-hw_info.share-epnum = hw-max_epid;
+
+   return 0;
+}
+
+static int fjes_hw_alloc_epbuf(struct epbuf_handler *epbh)
+{
+   void *mem;
+
+   mem = vmalloc(EP_BUFFER_SIZE);
+   if (!mem)
+   return -ENOMEM;
+   memset(mem, 0, EP_BUFFER_SIZE);
+
+   epbh-buffer = mem;
+   epbh-size = EP_BUFFER_SIZE;
+
+   epbh-info = (union ep_buffer_info *)mem;
+   epbh-ring = (u8 *) (mem + sizeof(union ep_buffer_info));
+
+   return 0;
+}
+
+void fjes_hw_setup_epbuf(struct epbuf_handler *epbh, u8 *mac_addr, u32 mtu)
+{
+
+   union ep_buffer_info *info = epbh-info;
+   int i;
+   u16 vlan_id[EP_BUFFER_SUPPORT_VLAN_MAX];
+
+   for (i = 0; i  EP_BUFFER_SUPPORT_VLAN_MAX; i++)
+   vlan_id[i] = info-v1i.vlan_id[i];
+
+   memset((void *)info, 0, sizeof(union ep_buffer_info));
+
+   info-v1i.version = 0;  /* version 0 */
+
+   for (i = 0; i  ETH_ALEN; i++)
+   info-v1i.mac_addr[i] = mac_addr[i];
+
+   info-v1i.head = 0;
+   info-v1i.tail = 1;
+
+   info-v1i.info_size = 

[PATCH 12/22] fjes: net_device_ops.ndo_get_stats64

2015-06-17 Thread Taku Izumi
This patch adds net_device_ops.ndo_get_stats64 callback.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes_main.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index 97bf487..eeda824 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -57,6 +57,8 @@ static netdev_tx_t fjes_xmit_frame(struct sk_buff *,
 static void fjes_raise_intr_rxdata_task(struct work_struct *);
 static void fjes_tx_stall_task(struct work_struct *);
 static irqreturn_t fjes_intr(int, void*);
+static struct rtnl_link_stats64
+*fjes_get_stats64(struct net_device *, struct rtnl_link_stats64 *);
 
 static int fjes_acpi_add(struct acpi_device *);
 static int fjes_acpi_remove(struct acpi_device *);
@@ -223,6 +225,7 @@ static const struct net_device_ops fjes_netdev_ops = {
.ndo_open   = fjes_open,
.ndo_stop   = fjes_close,
.ndo_start_xmit = fjes_xmit_frame,
+   .ndo_get_stats64= fjes_get_stats64,
 };
 
 /*
@@ -734,6 +737,17 @@ static netdev_tx_t fjes_xmit_frame(struct sk_buff *skb,
return ret;
 }
 
+static struct rtnl_link_stats64
+*fjes_get_stats64(struct net_device *netdev,
+   struct rtnl_link_stats64 *stats)
+{
+   struct fjes_adapter *adapter = netdev_priv(netdev);
+
+   memcpy(stats, adapter-stats64, sizeof(struct rtnl_link_stats64));
+
+   return stats;
+}
+
 static irqreturn_t fjes_intr(int irq, void *data)
 {
struct fjes_adapter *adapter = data;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/22] fjes: Hardware cleanup routine

2015-06-17 Thread Taku Izumi
This patch adds hardware cleanup routine to be
invoked at driver's .remove routine.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes_hw.c | 66 +
 drivers/platform/x86/fjes/fjes_hw.h |  1 +
 2 files changed, 67 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes_hw.c 
b/drivers/platform/x86/fjes/fjes_hw.c
index 1731827..d1f090a 100644
--- a/drivers/platform/x86/fjes/fjes_hw.c
+++ b/drivers/platform/x86/fjes/fjes_hw.c
@@ -56,6 +56,11 @@ static u8 *fjes_hw_iomap(struct fjes_hw *hw)
return base;
 }
 
+static void fjes_hw_iounmap(struct fjes_hw *hw)
+{
+   iounmap(hw-base);
+   release_mem_region(hw-hw_res.start, hw-hw_res.size);
+}
 
 int fjes_hw_reset(struct fjes_hw *hw)
 {
@@ -113,6 +118,12 @@ static int fjes_hw_alloc_shared_status_region(struct 
fjes_hw *hw)
return 0;
 }
 
+static void fjes_hw_free_shared_status_region(struct fjes_hw *hw)
+{
+   kfree(hw-hw_info.share);
+   hw-hw_info.share = NULL;
+}
+
 static int fjes_hw_alloc_epbuf(struct epbuf_handler *epbh)
 {
void *mem;
@@ -131,6 +142,18 @@ static int fjes_hw_alloc_epbuf(struct epbuf_handler *epbh)
return 0;
 }
 
+static void fjes_hw_free_epbuf(struct epbuf_handler *epbh)
+{
+   if (epbh-buffer)
+   vfree(epbh-buffer);
+
+   epbh-buffer = NULL;
+   epbh-size = 0;
+
+   epbh-info = NULL;
+   epbh-ring = NULL;
+}
+
 void fjes_hw_setup_epbuf(struct epbuf_handler *epbh, u8 *mac_addr, u32 mtu)
 {
 
@@ -267,6 +290,33 @@ static int fjes_hw_setup(struct fjes_hw *hw)
return 0;
 }
 
+static void fjes_hw_cleanup(struct fjes_hw *hw)
+{
+   int epidx;
+
+   if (!hw-ep_shm_info)
+   return;
+
+   fjes_hw_free_shared_status_region(hw);
+
+   kfree(hw-hw_info.req_buf);
+   hw-hw_info.req_buf = NULL;
+
+   kfree(hw-hw_info.res_buf);
+   hw-hw_info.res_buf = NULL;
+
+   for (epidx = 0; epidx  hw-max_epid ; epidx++) {
+   if (epidx == hw-my_epid)
+   continue;
+   fjes_hw_free_epbuf(hw-ep_shm_info[epidx].tx);
+   fjes_hw_free_epbuf(hw-ep_shm_info[epidx].rx);
+   }
+
+   kfree(hw-ep_shm_info);
+   hw-ep_shm_info = NULL;
+
+}
+
 int fjes_hw_init(struct fjes_hw *hw)
 {
int ret;
@@ -294,6 +344,22 @@ int fjes_hw_init(struct fjes_hw *hw)
return ret;
 }
 
+void fjes_hw_exit(struct fjes_hw *hw)
+{
+   int ret;
+
+   if (hw-base) {
+   ret = fjes_hw_reset(hw);
+   if (ret)
+   pr_err(%s: reset error, __func__);
+
+   fjes_hw_iounmap(hw);
+   hw-base = NULL;
+   }
+
+   fjes_hw_cleanup(hw);
+}
+
 void fjes_hw_set_irqmask(struct fjes_hw *hw, enum REG_ICTL_MASK intr_mask,
bool mask)
 {
diff --git a/drivers/platform/x86/fjes/fjes_hw.h 
b/drivers/platform/x86/fjes/fjes_hw.h
index 02f4ee9..0bb2d51 100644
--- a/drivers/platform/x86/fjes/fjes_hw.h
+++ b/drivers/platform/x86/fjes/fjes_hw.h
@@ -244,6 +244,7 @@ struct fjes_hw {
 
 
 int fjes_hw_init(struct fjes_hw *);
+void fjes_hw_exit(struct fjes_hw *);
 int fjes_hw_reset(struct fjes_hw *);
 
 void fjes_hw_init_command_registers(struct fjes_hw *,
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/22] fjes: platform_driver's .probe and .remove routine

2015-06-17 Thread Taku Izumi
This patch implements platform_driver's .probe and .remove
routine, and also adds board specific private data structure.

This driver registers net_device at platform_driver's .probe
routine and unregisters net_device at its .remove routine.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes.h  | 27 ++
 drivers/platform/x86/fjes/fjes_main.c | 96 +++
 2 files changed, 123 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes.h b/drivers/platform/x86/fjes/fjes.h
index 5586305..3515572 100644
--- a/drivers/platform/x86/fjes/fjes.h
+++ b/drivers/platform/x86/fjes/fjes.h
@@ -25,7 +25,34 @@
 
 #include linux/acpi.h
 
+#include fjes_hw.h
+
 #define FJES_ACPI_SYMBOL   Extended Socket
+#define FJES_MAX_QUEUES1
+#define FJES_TX_RETRY_INTERVAL (20 * HZ)
+
+
+/* board specific private data structure */
+struct fjes_adapter {
+
+   struct net_device *netdev;
+   struct platform_device *plat_dev;
+
+   struct napi_struct napi;
+   struct rtnl_link_stats64 stats64;
+
+   unsigned int tx_retry_count;
+   unsigned long tx_start_jiffies;
+   unsigned long rx_last_jiffies;
+   bool unset_rx_last;
+
+   bool force_reset;
+   bool open_guard;
+
+   bool irq_registered;
+
+   struct fjes_hw hw;
+};
 
 extern char fjes_driver_name[];
 extern char fjes_driver_version[];
diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index 258929a1..f38e0af 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -23,6 +23,7 @@
 #include linux/types.h
 #include linux/nls.h
 #include linux/platform_device.h
+#include linux/netdevice.h
 
 #include fjes.h
 
@@ -51,6 +52,9 @@ static acpi_status fjes_get_acpi_resource(struct 
acpi_resource *, void*);
 static int fjes_probe(struct platform_device *);
 static int fjes_remove(struct platform_device *);
 
+static int fjes_sw_init(struct fjes_adapter *);
+static void fjes_netdev_setup(struct net_device *);
+
 
 static const struct acpi_device_id fjes_acpi_ids[] = {
{PNP0C02, 0},
@@ -169,6 +173,9 @@ static acpi_status fjes_get_acpi_resource(struct 
acpi_resource *acpi_res,
return AE_OK;
 }
 
+static const struct net_device_ops fjes_netdev_ops = {
+};
+
 /*
  *  fjes_probe - Device Initialization Routine
  *
@@ -176,7 +183,67 @@ static acpi_status fjes_get_acpi_resource(struct 
acpi_resource *acpi_res,
  */
 static int fjes_probe(struct platform_device *plat_dev)
 {
+   struct net_device *netdev;
+   struct fjes_adapter *adapter;
+   struct fjes_hw *hw;
+   struct resource *res;
+   int err;
+
+   err = -ENOMEM;
+   netdev = alloc_netdev_mq(sizeof(struct fjes_adapter), es%d,
+   NET_NAME_UNKNOWN, fjes_netdev_setup, FJES_MAX_QUEUES);
+
+   if (!netdev)
+   goto err_alloc_netdev;
+
+   SET_NETDEV_DEV(netdev, plat_dev-dev);
+
+   dev_set_drvdata(plat_dev-dev, netdev);
+   adapter = netdev_priv(netdev);
+   adapter-netdev = netdev;
+   adapter-plat_dev = plat_dev;
+   hw = adapter-hw;
+   hw-back = adapter;
+
+   /* setup the private structure */
+   err = fjes_sw_init(adapter);
+   if (err)
+   goto err_sw_init;
+
+   adapter-force_reset = false;
+   adapter-open_guard = false;
+
+   res = platform_get_resource(plat_dev, IORESOURCE_MEM, 0);
+   hw-hw_res.start = res-start;
+   hw-hw_res.size = res-end - res-start;
+   hw-hw_res.irq = platform_get_irq(plat_dev, 0);
+   err = fjes_hw_init(adapter-hw);
+   if (err)
+   goto err_hw_init;
+
+   /* setup MAC address (02:00:00:00:00:[epid])*/
+   netdev-dev_addr[0] = 2;
+   netdev-dev_addr[1] = 0;
+   netdev-dev_addr[2] = 0;
+   netdev-dev_addr[3] = 0;
+   netdev-dev_addr[4] = 0;
+   netdev-dev_addr[5] = hw-my_epid; /* EPID */
+
+   err = register_netdev(netdev);
+   if (err)
+   goto err_register;
+
+   netif_carrier_off(netdev);
+
return 0;
+
+err_register:
+   fjes_hw_exit(adapter-hw);
+err_hw_init:
+err_sw_init:
+   free_netdev(netdev);
+err_alloc_netdev:
+   return err;
 }
 
 /*
@@ -184,9 +251,38 @@ static int fjes_probe(struct platform_device *plat_dev)
  */
 static int fjes_remove(struct platform_device *plat_dev)
 {
+   struct net_device *netdev = dev_get_drvdata(plat_dev-dev);
+   struct fjes_adapter *adapter = netdev_priv(netdev);
+   struct fjes_hw *hw = adapter-hw;
+
+   unregister_netdev(netdev);
+
+   fjes_hw_exit(hw);
+
+   free_netdev(netdev);
+
return 0;
 }
 
+static int fjes_sw_init(struct fjes_adapter *adapter)
+{
+   return 0;
+}
+
+/*
+ *  fjes_netdev_setup - netdevice initialization routine
+ */
+static void fjes_netdev_setup(struct net_device *netdev)
+{
+   ether_setup(netdev);
+
+   netdev-watchdog_timeo = 

[PATCH 11/22] fjes: NAPI polling function

2015-06-17 Thread Taku Izumi
This patch adds NAPI polling function and receive related work.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes_hw.c   |  46 +
 drivers/platform/x86/fjes/fjes_hw.h   |   6 ++
 drivers/platform/x86/fjes/fjes_main.c | 179 ++
 3 files changed, 231 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes_hw.c 
b/drivers/platform/x86/fjes/fjes_hw.c
index 8be343f..da509bd 100644
--- a/drivers/platform/x86/fjes/fjes_hw.c
+++ b/drivers/platform/x86/fjes/fjes_hw.c
@@ -843,6 +843,52 @@ bool fjes_hw_check_vlan_id(struct epbuf_handler *epbh, u16 
vlan_id)
return ret;
 }
 
+bool fjes_hw_epbuf_rx_is_empty(struct epbuf_handler *epbh)
+{
+   union ep_buffer_info *info;
+   static bool log_output;
+
+   info = epbh-info;
+
+   if (info-v1i.count_max == 0) {
+   if (false == log_output)
+   log_output = true;
+   return true;
+   }
+   return EP_RING_EMPTY(info-v1i.head, info-v1i.tail,
+   info-v1i.count_max);
+}
+
+void *fjes_hw_epbuf_rx_curpkt_get_addr(struct epbuf_handler *epbh,
+   size_t *psize)
+{
+   union ep_buffer_info *info = epbh-info;
+   struct esmem_frame_t *ring_frame;
+   void *frame;
+
+   ring_frame =
+   (struct esmem_frame_t *)
+ (epbh-ring[EP_RING_INDEX(info-v1i.head,
+   info-v1i.count_max) *
+   info-v1i.frame_max]);
+
+   *psize = (size_t) ring_frame-frame_size;
+
+   frame = ring_frame-frame_data;
+
+   return frame;
+}
+
+void fjes_hw_epbuf_rx_curpkt_drop(struct epbuf_handler *epbh)
+{
+   union ep_buffer_info *info = epbh-info;
+
+   if (fjes_hw_epbuf_rx_is_empty(epbh))
+   return;
+
+   EP_RING_INDEX_INC(epbh-info-v1i.head, info-v1i.count_max);
+}
+
 int fjes_hw_epbuf_tx_pkt_send(struct epbuf_handler *epbh,
void *frame, size_t size)
 {
diff --git a/drivers/platform/x86/fjes/fjes_hw.h 
b/drivers/platform/x86/fjes/fjes_hw.h
index f6fdae5..1ad0fcb 100644
--- a/drivers/platform/x86/fjes/fjes_hw.h
+++ b/drivers/platform/x86/fjes/fjes_hw.h
@@ -68,6 +68,8 @@ struct fjes_hw;
((_num) = EP_RING_INDEX((_num) + 1, (_max)))
 #define EP_RING_FULL(_head, _tail, _max)   \
(0 == EP_RING_INDEX(((_tail) - (_head)), (_max)))
+#define EP_RING_EMPTY(_head, _tail, _max) \
+   (1 == EP_RING_INDEX(((_tail) - (_head)), (_max)))
 
 #define FJES_MTU_TO_BUFFER_SIZE(mtu) \
(ETH_HLEN + VLAN_HLEN + (mtu) + ETH_FCS_LEN)
@@ -323,6 +325,10 @@ int fjes_hw_epid_is_shared(struct fjes_device_shared_info 
*, int);
 bool fjes_hw_check_epbuf_version(struct epbuf_handler *, u32);
 bool fjes_hw_check_mtu(struct epbuf_handler *, u32);
 bool fjes_hw_check_vlan_id(struct epbuf_handler *, u16);
+bool fjes_hw_epbuf_rx_is_empty(struct epbuf_handler *);
+void *fjes_hw_epbuf_rx_curpkt_get_addr(struct epbuf_handler *,
+   size_t *);
+void fjes_hw_epbuf_rx_curpkt_drop(struct epbuf_handler *);
 int fjes_hw_epbuf_tx_pkt_send(struct epbuf_handler *, void *, size_t);
 
 #endif /* FJES_HW_H_ */
diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index a2dddb2..97bf487 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -68,6 +68,9 @@ static int fjes_remove(struct platform_device *);
 static int fjes_sw_init(struct fjes_adapter *);
 static void fjes_netdev_setup(struct net_device *);
 
+static void fjes_rx_irq(struct fjes_adapter *, int);
+static int fjes_poll(struct napi_struct *, int);
+
 
 static const struct acpi_device_id fjes_acpi_ids[] = {
{PNP0C02, 0},
@@ -241,6 +244,8 @@ static int fjes_open(struct net_device *netdev)
hw-txrx_stop_req_bit = 0;
hw-epstop_req_bit = 0;
 
+   napi_enable(adapter-napi);
+
fjes_hw_capture_interrupt_status(hw);
 
result = fjes_request_irq(adapter);
@@ -256,6 +261,7 @@ static int fjes_open(struct net_device *netdev)
 
 err_req_irq:
fjes_free_irq(adapter);
+   napi_disable(adapter-napi);
 
 err_setup_res:
fjes_free_resources(adapter);
@@ -277,6 +283,8 @@ static int fjes_close(struct net_device *netdev)
 
fjes_hw_raise_epstop(hw);
 
+   napi_disable(adapter-napi);
+
for (epidx = 0; epidx  hw-max_epid; epidx++) {
if (epidx == hw-my_epid)
continue;
@@ -736,6 +744,10 @@ static irqreturn_t fjes_intr(int irq, void *data)
icr = fjes_hw_capture_interrupt_status(hw);
 
if (icr  REG_IS_MASK_IS_ASSERT) {
+
+   if (icr  REG_ICTL_MASK_RX_DATA)
+   fjes_rx_irq(adapter, icr  REG_IS_MASK_EPID);
+
ret = IRQ_HANDLED;
} else
ret = IRQ_NONE;
@@ -743,6 +755,167 @@ static irqreturn_t fjes_intr(int irq, void *data)
return ret;
 }
 
+static int 

[PATCH 16/22] fjes: interrupt_watch_task

2015-06-17 Thread Taku Izumi
This patch adds interrupt_watch_task.
This task is used to prevent delay of interrupts.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes.h  |  5 +
 drivers/platform/x86/fjes/fjes_main.c | 40 ++-
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/fjes/fjes.h b/drivers/platform/x86/fjes/fjes.h
index 1ca282c..1a880a0 100644
--- a/drivers/platform/x86/fjes/fjes.h
+++ b/drivers/platform/x86/fjes/fjes.h
@@ -33,6 +33,7 @@
 #define FJES_TX_RETRY_TIMEOUT  (100)
 #define FJES_TX_TX_STALL_TIMEOUT   (FJES_TX_RETRY_INTERVAL/2)
 #define FJES_OPEN_ZONE_UPDATE_WAIT (300) /* msec */
+#define FJES_IRQ_WATCH_DELAY   (HZ)
 
 /* board specific private data structure */
 struct fjes_adapter {
@@ -54,10 +55,14 @@ struct fjes_adapter {
bool irq_registered;
 
struct workqueue_struct *txrx_wq;
+   struct workqueue_struct *control_wq;
 
struct work_struct tx_stall_task;
struct work_struct raise_intr_rxdata_task;
 
+   struct delayed_work interrupt_watch_task;
+   bool interrupt_watch_enable;
+
struct fjes_hw hw;
 };
 
diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index f5df457..8e78014 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -73,7 +73,7 @@ static int fjes_remove(struct platform_device *);
 
 static int fjes_sw_init(struct fjes_adapter *);
 static void fjes_netdev_setup(struct net_device *);
-
+static void fjes_irq_watch_task(struct work_struct *);
 static void fjes_rx_irq(struct fjes_adapter *, int);
 static int fjes_poll(struct napi_struct *, int);
 
@@ -200,6 +200,13 @@ static int fjes_request_irq(struct fjes_adapter *adapter)
struct net_device *netdev = adapter-netdev;
int result = -1;
 
+   adapter-interrupt_watch_enable = true;
+   if (!delayed_work_pending(adapter-interrupt_watch_task)) {
+   queue_delayed_work(adapter-control_wq,
+   adapter-interrupt_watch_task,
+   FJES_IRQ_WATCH_DELAY);
+   }
+
if (!adapter-irq_registered) {
result = request_irq(adapter-hw.hw_res.irq, fjes_intr,
IRQF_SHARED, netdev-name, adapter);
@@ -216,6 +223,8 @@ static void fjes_free_irq(struct fjes_adapter *adapter)
 {
struct fjes_hw *hw = adapter-hw;
 
+   adapter-interrupt_watch_enable = false;
+   cancel_delayed_work_sync(adapter-interrupt_watch_task);
 
fjes_hw_set_irqmask(hw, REG_ICTL_MASK_ALL, true);
 
@@ -306,6 +315,7 @@ static int fjes_close(struct net_device *netdev)
 
fjes_free_irq(adapter);
 
+   cancel_delayed_work_sync(adapter-interrupt_watch_task);
cancel_work_sync(adapter-raise_intr_rxdata_task);
cancel_work_sync(adapter-tx_stall_task);
 
@@ -1047,11 +1057,15 @@ static int fjes_probe(struct platform_device *plat_dev)
adapter-open_guard = false;
 
adapter-txrx_wq = create_workqueue(DRV_NAME/txrx);
+   adapter-control_wq = create_workqueue(DRV_NAME/control);
 
INIT_WORK(adapter-tx_stall_task, fjes_tx_stall_task);
INIT_WORK(adapter-raise_intr_rxdata_task,
fjes_raise_intr_rxdata_task);
 
+   INIT_DELAYED_WORK(adapter-interrupt_watch_task, fjes_irq_watch_task);
+   adapter-interrupt_watch_enable = false;
+
res = platform_get_resource(plat_dev, IORESOURCE_MEM, 0);
hw-hw_res.start = res-start;
hw-hw_res.size = res-end - res-start;
@@ -1094,8 +1108,11 @@ static int fjes_remove(struct platform_device *plat_dev)
struct fjes_adapter *adapter = netdev_priv(netdev);
struct fjes_hw *hw = adapter-hw;
 
+   cancel_delayed_work_sync(adapter-interrupt_watch_task);
cancel_work_sync(adapter-raise_intr_rxdata_task);
cancel_work_sync(adapter-tx_stall_task);
+   if (adapter-control_wq)
+   destroy_workqueue(adapter-control_wq);
if (adapter-txrx_wq)
destroy_workqueue(adapter-txrx_wq);
 
@@ -1133,6 +1150,27 @@ static void fjes_netdev_setup(struct net_device *netdev)
netdev-features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER;
 }
 
+static void fjes_irq_watch_task(struct work_struct *work)
+{
+   struct fjes_adapter *adapter = container_of(to_delayed_work(work),
+   struct fjes_adapter, interrupt_watch_task);
+
+   local_irq_disable();
+   fjes_intr(adapter-hw.hw_res.irq, adapter);
+   local_irq_enable();
+
+   if (fjes_rxframe_search_exist(adapter, 0) = 0)
+   napi_schedule(adapter-napi);
+
+
+   if (adapter-interrupt_watch_enable) {
+   if (!delayed_work_pending(adapter-interrupt_watch_task))
+   queue_delayed_work(adapter-control_wq,
+   adapter-interrupt_watch_task,
+  

[PATCH 09/22] fjes: raise_intr_rxdata_task

2015-06-17 Thread Taku Izumi
This patch add raise_intr_rxdata_task.

Extended Socket Network Device is shared memory
based, so someone's transmission denotes other's
reception. In order to notify receivers, sender
has to raise interruption of receivers.
raise_intr_rxdata_task does this work.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes.h  |  4 ++
 drivers/platform/x86/fjes/fjes_main.c | 69 +++
 2 files changed, 73 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes.h b/drivers/platform/x86/fjes/fjes.h
index d8901aa..89b95c8 100644
--- a/drivers/platform/x86/fjes/fjes.h
+++ b/drivers/platform/x86/fjes/fjes.h
@@ -52,6 +52,10 @@ struct fjes_adapter {
 
bool irq_registered;
 
+   struct workqueue_struct *txrx_wq;
+
+   struct work_struct raise_intr_rxdata_task;
+
struct fjes_hw hw;
 };
 
diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index 0d4bca6..4d012f4 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -54,6 +54,7 @@ static int fjes_setup_resources(struct fjes_adapter *);
 static void fjes_free_resources(struct fjes_adapter *);
 static netdev_tx_t fjes_xmit_frame(struct sk_buff *,
struct net_device *);
+static void fjes_raise_intr_rxdata_task(struct work_struct *);
 static irqreturn_t fjes_intr(int, void*);
 
 static int fjes_acpi_add(struct acpi_device *);
@@ -285,6 +286,8 @@ static int fjes_close(struct net_device *netdev)
 
fjes_free_irq(adapter);
 
+   cancel_work_sync(adapter-raise_intr_rxdata_task);
+
fjes_hw_wait_epstop(hw);
 
fjes_free_resources(adapter);
@@ -419,6 +422,60 @@ static void fjes_free_resources(struct fjes_adapter 
*adapter)
 
 }
 
+static void fjes_raise_intr_rxdata_task(struct work_struct *work)
+{
+   struct fjes_adapter *adapter = container_of(work,
+   struct fjes_adapter, raise_intr_rxdata_task);
+   struct fjes_hw *hw = adapter-hw;
+   int epid;
+   int max_epid, my_epid;
+   enum ep_partner_status pstatus;
+
+   my_epid = hw-my_epid;
+   max_epid = hw-max_epid;
+
+   for (epid = 0; epid  max_epid; epid++)
+   hw-ep_shm_info[epid].tx_status_work = 0;
+
+   for (epid = 0; epid  max_epid; epid++) {
+   if (epid == my_epid)
+   continue;
+
+   pstatus = fjes_hw_get_partner_ep_status(hw, epid);
+   if (pstatus == EP_PARTNER_SHARED) {
+
+   hw-ep_shm_info[epid].tx_status_work =
+   hw-ep_shm_info[epid].tx.info-v1i.tx_status;
+
+   if (hw-ep_shm_info[epid].tx_status_work ==
+   FJES_TX_DELAY_SEND_PENDING) {
+   hw-ep_shm_info[epid].tx.info-v1i.tx_status =
+   FJES_TX_DELAY_SEND_NONE;
+   }
+   }
+
+   }
+
+   for (epid = 0; epid  max_epid; epid++) {
+   if (epid == my_epid)
+   continue;
+
+   pstatus = fjes_hw_get_partner_ep_status(hw, epid);
+   if ((hw-ep_shm_info[epid].tx_status_work ==
+   FJES_TX_DELAY_SEND_PENDING) 
+   (pstatus == EP_PARTNER_SHARED) 
+   !(hw-ep_shm_info[epid].rx.info-v1i.rx_status)) {
+
+   fjes_hw_raise_interrupt(hw, epid, 
REG_ICTL_MASK_RX_DATA);
+
+   }
+
+   }
+
+   usleep_range(500, 1000);
+
+}
+
 static int fjes_tx_send(struct fjes_adapter *adapter, int dest,
void *data, size_t len)
 {
@@ -431,6 +488,9 @@ static int fjes_tx_send(struct fjes_adapter *adapter, int 
dest,
 
adapter-hw.ep_shm_info[dest].tx.info-v1i.tx_status =
FJES_TX_DELAY_SEND_PENDING;
+   if (!work_pending(adapter-raise_intr_rxdata_task))
+   queue_work(adapter-txrx_wq,
+  adapter-raise_intr_rxdata_task);
 
retval = 0;
return retval;
@@ -660,6 +720,11 @@ static int fjes_probe(struct platform_device *plat_dev)
adapter-force_reset = false;
adapter-open_guard = false;
 
+   adapter-txrx_wq = create_workqueue(DRV_NAME/txrx);
+
+   INIT_WORK(adapter-raise_intr_rxdata_task,
+   fjes_raise_intr_rxdata_task);
+
res = platform_get_resource(plat_dev, IORESOURCE_MEM, 0);
hw-hw_res.start = res-start;
hw-hw_res.size = res-end - res-start;
@@ -702,6 +767,10 @@ static int fjes_remove(struct platform_device *plat_dev)
struct fjes_adapter *adapter = netdev_priv(netdev);
struct fjes_hw *hw = adapter-hw;
 
+   cancel_work_sync(adapter-raise_intr_rxdata_task);
+   if (adapter-txrx_wq)
+   destroy_workqueue(adapter-txrx_wq);
+
unregister_netdev(netdev);
 
fjes_hw_exit(hw);
-- 

[PATCH 20/22] fjes: epstop_task

2015-06-17 Thread Taku Izumi
This patch adds epstop_task.
This task is used to process other receiver's
cancellation request.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes_hw.c   | 34 ++
 drivers/platform/x86/fjes/fjes_hw.h   |  1 +
 drivers/platform/x86/fjes/fjes_main.c |  1 +
 3 files changed, 36 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes_hw.c 
b/drivers/platform/x86/fjes/fjes_hw.c
index e07b266..c22679a 100644
--- a/drivers/platform/x86/fjes/fjes_hw.c
+++ b/drivers/platform/x86/fjes/fjes_hw.c
@@ -23,6 +23,7 @@
 #include fjes.h
 
 static void fjes_hw_update_zone_task(struct work_struct *);
+static void fjes_hw_epstop_task(struct work_struct *);
 
 /* supported MTU list */
 u32 fjes_support_mtu[] = {
@@ -334,6 +335,7 @@ int fjes_hw_init(struct fjes_hw *hw)
fjes_hw_set_irqmask(hw, REG_ICTL_MASK_ALL, true);
 
INIT_WORK(hw-update_zone_task, fjes_hw_update_zone_task);
+   INIT_WORK(hw-epstop_task, fjes_hw_epstop_task);
 
mutex_init(hw-hw_info.lock);
 
@@ -364,6 +366,7 @@ void fjes_hw_exit(struct fjes_hw *hw)
fjes_hw_cleanup(hw);
 
cancel_work_sync(hw-update_zone_task);
+   cancel_work_sync(hw-epstop_task);
 }
 
 static enum fjes_dev_command_response_e fjes_hw_issue_request_command(
@@ -1123,3 +1126,34 @@ static void fjes_hw_update_zone_task(struct work_struct 
*work)
}
 }
 
+static void fjes_hw_epstop_task(struct work_struct *work)
+{
+   struct fjes_hw *hw = container_of(work,
+   struct fjes_hw, epstop_task);
+   struct fjes_adapter *adapter = (struct fjes_adapter *)hw-back;
+   int epid_bit;
+   unsigned long remain_bit;
+
+   while ((remain_bit = hw-epstop_req_bit)) {
+
+   for (epid_bit = 0; remain_bit; (remain_bit = 1),
+   (epid_bit++)) {
+
+   if (remain_bit  1) {
+
+   hw-ep_shm_info[epid_bit].
+   tx.info-v1i.rx_status |=
+   FJES_RX_STOP_REQ_DONE;
+
+   clear_bit(epid_bit, hw-epstop_req_bit);
+   set_bit(epid_bit,
+   adapter-unshare_watch_bitmask);
+
+   if (!work_pending(adapter-unshare_watch_task))
+   queue_work(adapter-control_wq,
+   adapter-unshare_watch_task);
+   }
+   }
+   }
+}
+
diff --git a/drivers/platform/x86/fjes/fjes_hw.h 
b/drivers/platform/x86/fjes/fjes_hw.h
index 1b0afc0..e242a04 100644
--- a/drivers/platform/x86/fjes/fjes_hw.h
+++ b/drivers/platform/x86/fjes/fjes_hw.h
@@ -285,6 +285,7 @@ struct fjes_hw {
unsigned long txrx_stop_req_bit;
unsigned long epstop_req_bit;
struct work_struct update_zone_task;
+   struct work_struct epstop_task;
 
int my_epid;
int max_epid;
diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index 900aa65..f42441f 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -324,6 +324,7 @@ static int fjes_close(struct net_device *netdev)
cancel_work_sync(adapter-tx_stall_task);
 
cancel_work_sync(hw-update_zone_task);
+   cancel_work_sync(hw-epstop_task);
 
fjes_hw_wait_epstop(hw);
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 15/22] fjes: net_device_ops.ndo_vlan_rx_add/kill_vid

2015-06-17 Thread Taku Izumi
This patch adds net_device_ops.ndo_vlan_rx_add_vid and
net_device_ops.ndo_vlan_rx_kill_vid callback.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes_hw.c   | 27 +++
 drivers/platform/x86/fjes/fjes_hw.h   |  2 ++
 drivers/platform/x86/fjes/fjes_main.c | 40 +++
 3 files changed, 69 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes_hw.c 
b/drivers/platform/x86/fjes/fjes_hw.c
index da509bd..85f9693 100644
--- a/drivers/platform/x86/fjes/fjes_hw.c
+++ b/drivers/platform/x86/fjes/fjes_hw.c
@@ -843,6 +843,33 @@ bool fjes_hw_check_vlan_id(struct epbuf_handler *epbh, u16 
vlan_id)
return ret;
 }
 
+bool fjes_hw_set_vlan_id(struct epbuf_handler *epbh, u16 vlan_id)
+{
+   union ep_buffer_info *info = epbh-info;
+   int i;
+
+   for (i = 0; i  EP_BUFFER_SUPPORT_VLAN_MAX; i++) {
+   if (info-v1i.vlan_id[i] == 0) {
+   info-v1i.vlan_id[i] = vlan_id;
+   return true;
+   }
+   }
+   return false;
+}
+
+void fjes_hw_del_vlan_id(struct epbuf_handler *epbh, u16 vlan_id)
+{
+   union ep_buffer_info *info = epbh-info;
+   int i;
+
+   if (0 != vlan_id) {
+   for (i = 0; i  EP_BUFFER_SUPPORT_VLAN_MAX; i++) {
+   if (vlan_id == info-v1i.vlan_id[i])
+   info-v1i.vlan_id[i] = 0;
+   }
+   }
+}
+
 bool fjes_hw_epbuf_rx_is_empty(struct epbuf_handler *epbh)
 {
union ep_buffer_info *info;
diff --git a/drivers/platform/x86/fjes/fjes_hw.h 
b/drivers/platform/x86/fjes/fjes_hw.h
index 1ad0fcb..14e8db9 100644
--- a/drivers/platform/x86/fjes/fjes_hw.h
+++ b/drivers/platform/x86/fjes/fjes_hw.h
@@ -325,6 +325,8 @@ int fjes_hw_epid_is_shared(struct fjes_device_shared_info 
*, int);
 bool fjes_hw_check_epbuf_version(struct epbuf_handler *, u32);
 bool fjes_hw_check_mtu(struct epbuf_handler *, u32);
 bool fjes_hw_check_vlan_id(struct epbuf_handler *, u16);
+bool fjes_hw_set_vlan_id(struct epbuf_handler *, u16);
+void fjes_hw_del_vlan_id(struct epbuf_handler *, u16);
 bool fjes_hw_epbuf_rx_is_empty(struct epbuf_handler *);
 void *fjes_hw_epbuf_rx_curpkt_get_addr(struct epbuf_handler *,
size_t *);
diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index 84727d8..f5df457 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -60,6 +60,8 @@ static irqreturn_t fjes_intr(int, void*);
 static struct rtnl_link_stats64
 *fjes_get_stats64(struct net_device *, struct rtnl_link_stats64 *);
 static int fjes_change_mtu(struct net_device *, int);
+static int fjes_vlan_rx_add_vid(struct net_device *, __be16 proto, u16);
+static int fjes_vlan_rx_kill_vid(struct net_device *, __be16 proto, u16);
 static void fjes_tx_retry(struct net_device *);
 
 static int fjes_acpi_add(struct acpi_device *);
@@ -230,6 +232,8 @@ static const struct net_device_ops fjes_netdev_ops = {
.ndo_get_stats64= fjes_get_stats64,
.ndo_change_mtu = fjes_change_mtu,
.ndo_tx_timeout = fjes_tx_retry,
+   .ndo_vlan_rx_add_vid= fjes_vlan_rx_add_vid,
+   .ndo_vlan_rx_kill_vid = fjes_vlan_rx_kill_vid,
 };
 
 /*
@@ -787,6 +791,42 @@ static int fjes_change_mtu(struct net_device *netdev, int 
new_mtu)
return -EINVAL;
 }
 
+static int fjes_vlan_rx_add_vid(struct net_device *netdev,
+   __be16 proto, u16 vid)
+{
+   struct fjes_adapter *adapter = netdev_priv(netdev);
+   bool ret = true;
+   int epid;
+
+   for (epid = 0; epid  adapter-hw.max_epid; epid++) {
+   if (epid == adapter-hw.my_epid)
+   continue;
+
+   if (!fjes_hw_check_vlan_id(
+   (adapter-hw.ep_shm_info[epid].tx), vid))
+   ret = fjes_hw_set_vlan_id(
+   (adapter-hw.ep_shm_info[epid].tx), vid);
+   }
+
+   return ret ? 0 : -ENOSPC;
+}
+
+static int fjes_vlan_rx_kill_vid(struct net_device *netdev,
+   __be16 proto, u16 vid)
+{
+   struct fjes_adapter *adapter = netdev_priv(netdev);
+   int epid;
+
+   for (epid = 0; epid  adapter-hw.max_epid; epid++) {
+   if (epid == adapter-hw.my_epid)
+   continue;
+
+   fjes_hw_del_vlan_id((adapter-hw.ep_shm_info[epid].tx), vid);
+   }
+
+   return 0;
+}
+
 static irqreturn_t fjes_intr(int irq, void *data)
 {
struct fjes_adapter *adapter = data;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/22] fjes: buffer address regist/unregistration routine

2015-06-17 Thread Taku Izumi
This patch adds buffer address regist/unregistration routine.

This function is mainly invoked when network device's
activation (open) and deactivation (close)
in order to retist/unregist shared buffer address.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes_hw.c | 189 
 drivers/platform/x86/fjes/fjes_hw.h |   9 +-
 2 files changed, 197 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/fjes/fjes_hw.c 
b/drivers/platform/x86/fjes/fjes_hw.c
index eb04d9a..5c68541 100644
--- a/drivers/platform/x86/fjes/fjes_hw.c
+++ b/drivers/platform/x86/fjes/fjes_hw.c
@@ -461,6 +461,195 @@ int fjes_hw_request_info(struct fjes_hw *hw)
return result;
 }
 
+int fjes_hw_register_buff_addr(struct fjes_hw *hw, int dest_epid,
+   struct ep_share_mem_info *buf_pair)
+{
+   union fjes_device_command_req *req_buf = hw-hw_info.req_buf;
+   union fjes_device_command_res *res_buf = hw-hw_info.res_buf;
+   enum fjes_dev_command_response_e ret;
+   int i, idx;
+   int page_count;
+   void *addr;
+   int timeout;
+   int result;
+
+   if (test_bit(dest_epid, hw-hw_info.buffer_share_bit))
+   return 0;
+
+   memset(req_buf, 0, hw-hw_info.req_buf_size);
+   memset(res_buf, 0, hw-hw_info.res_buf_size);
+
+   req_buf-share_buffer.length =
+   FJES_DEV_COMMAND_SHARE_BUFFER_REQ_LEN(buf_pair-tx.size,
+   buf_pair-rx.size);
+   req_buf-share_buffer.epid = dest_epid;
+
+   idx = 0;
+   req_buf-share_buffer.buffer[idx++] = buf_pair-tx.size;
+   page_count = buf_pair-tx.size / EP_BUFFER_INFO_SIZE;
+   for (i = 0; i  page_count; i++) {
+   addr = ((u8 *)(buf_pair-tx.buffer)) +
+   (i * EP_BUFFER_INFO_SIZE);
+   req_buf-share_buffer.buffer[idx++] =
+   (__le64)(page_to_phys(vmalloc_to_page(addr)) +
+   offset_in_page(addr));
+   }
+
+   req_buf-share_buffer.buffer[idx++] = buf_pair-rx.size;
+   page_count = buf_pair-rx.size / EP_BUFFER_INFO_SIZE;
+   for (i = 0; i  page_count; i++) {
+   addr = ((u8 *)(buf_pair-rx.buffer)) +
+   (i * EP_BUFFER_INFO_SIZE);
+   req_buf-share_buffer.buffer[idx++] =
+   (__le64)(page_to_phys(vmalloc_to_page(addr)) +
+   offset_in_page(addr));
+   }
+
+   res_buf-share_buffer.length = 0;
+   res_buf-share_buffer.code = 0;
+
+   ret = fjes_hw_issue_request_command(hw, FJES_CMD_REQ_SHARE_BUFFER);
+
+   timeout = FJES_COMMAND_REQ_BUFF_TIMEOUT * 1000;
+   while ((ret == FJES_CMD_STATUS_NORMAL) 
+  (res_buf-share_buffer.length ==
+  FJES_DEV_COMMAND_SHARE_BUFFER_RES_LEN) 
+  (res_buf-share_buffer.code == FJES_CMD_REQ_RES_CODE_BUSY) 
+  (timeout  0)) {
+
+   msleep(200 + hw-my_epid * 20);
+   timeout -= (200 + hw-my_epid * 20);
+
+   res_buf-share_buffer.length = 0;
+   res_buf-share_buffer.code = 0;
+
+   ret = fjes_hw_issue_request_command(hw,
+   FJES_CMD_REQ_SHARE_BUFFER);
+   }
+
+   result = 0;
+
+   if (res_buf-share_buffer.length !=
+   FJES_DEV_COMMAND_SHARE_BUFFER_RES_LEN)
+   result = -ENOMSG;
+   else if (ret == FJES_CMD_STATUS_NORMAL) {
+   switch (res_buf-share_buffer.code) {
+   case FJES_CMD_REQ_RES_CODE_NORMAL:
+   result = 0;
+   set_bit(dest_epid, hw-hw_info.buffer_share_bit);
+   break;
+   case FJES_CMD_REQ_RES_CODE_BUSY:
+   result = -EBUSY;
+   break;
+   default:
+   result = -EPERM;
+   break;
+   }
+   } else {
+   switch (ret) {
+   case FJES_CMD_STATUS_UNKNOWN:
+   result = -EPERM;
+   break;
+   case FJES_CMD_STATUS_TIMEOUT:
+   result = -EBUSY;
+   break;
+   case FJES_CMD_STATUS_ERROR_PARAM:
+   case FJES_CMD_STATUS_ERROR_STATUS:
+   default:
+   result = -EPERM;
+   break;
+   }
+   }
+
+   return result;
+}
+
+int fjes_hw_unregister_buff_addr(struct fjes_hw *hw, int dest_epid)
+{
+   union fjes_device_command_req *req_buf = hw-hw_info.req_buf;
+   union fjes_device_command_res *res_buf = hw-hw_info.res_buf;
+   struct fjes_device_shared_info *share = hw-hw_info.share;
+   enum fjes_dev_command_response_e 

[PATCH 10/22] fjes: tx_stall_task

2015-06-17 Thread Taku Izumi
This patch adds tx_stall_task.
When receiver's buffer is full, sender stops
its tx queue. This task is used to monitor
receiver's status and when receiver's buffer
is avairable, it resumes tx queue.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes.h  |  2 ++
 drivers/platform/x86/fjes/fjes_main.c | 62 +++
 2 files changed, 64 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes.h b/drivers/platform/x86/fjes/fjes.h
index 89b95c8..1ca282c 100644
--- a/drivers/platform/x86/fjes/fjes.h
+++ b/drivers/platform/x86/fjes/fjes.h
@@ -31,6 +31,7 @@
 #define FJES_MAX_QUEUES1
 #define FJES_TX_RETRY_INTERVAL (20 * HZ)
 #define FJES_TX_RETRY_TIMEOUT  (100)
+#define FJES_TX_TX_STALL_TIMEOUT   (FJES_TX_RETRY_INTERVAL/2)
 #define FJES_OPEN_ZONE_UPDATE_WAIT (300) /* msec */
 
 /* board specific private data structure */
@@ -54,6 +55,7 @@ struct fjes_adapter {
 
struct workqueue_struct *txrx_wq;
 
+   struct work_struct tx_stall_task;
struct work_struct raise_intr_rxdata_task;
 
struct fjes_hw hw;
diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index 4d012f4..a2dddb2 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -55,6 +55,7 @@ static void fjes_free_resources(struct fjes_adapter *);
 static netdev_tx_t fjes_xmit_frame(struct sk_buff *,
struct net_device *);
 static void fjes_raise_intr_rxdata_task(struct work_struct *);
+static void fjes_tx_stall_task(struct work_struct *);
 static irqreturn_t fjes_intr(int, void*);
 
 static int fjes_acpi_add(struct acpi_device *);
@@ -287,6 +288,7 @@ static int fjes_close(struct net_device *netdev)
fjes_free_irq(adapter);
 
cancel_work_sync(adapter-raise_intr_rxdata_task);
+   cancel_work_sync(adapter-tx_stall_task);
 
fjes_hw_wait_epstop(hw);
 
@@ -422,6 +424,62 @@ static void fjes_free_resources(struct fjes_adapter 
*adapter)
 
 }
 
+static void fjes_tx_stall_task(struct work_struct *work)
+{
+   struct fjes_adapter *adapter = container_of(work,
+   struct fjes_adapter, tx_stall_task);
+   struct fjes_hw *hw = adapter-hw;
+   struct net_device *netdev = adapter-netdev;
+   enum ep_partner_status pstatus;
+   int epid;
+   int max_epid, my_epid;
+   union ep_buffer_info *info;
+   int all_queue_available;
+   int i;
+   int sendable;
+
+   if (((long)jiffies -
+   (long)(netdev-trans_start))  FJES_TX_TX_STALL_TIMEOUT) {
+   netif_wake_queue(netdev);
+   return;
+   }
+
+   my_epid = hw-my_epid;
+   max_epid = hw-max_epid;
+
+   for (i = 0; i  5; i++) {
+   all_queue_available = 1;
+
+   for (epid = 0; epid  max_epid; epid++) {
+
+   if (my_epid == epid)
+   continue;
+
+   pstatus = fjes_hw_get_partner_ep_status(hw, epid);
+   sendable = (pstatus == EP_PARTNER_SHARED);
+   if (!sendable)
+   continue;
+
+   info = adapter-hw.ep_shm_info[epid].tx.info;
+
+   if (EP_RING_FULL(info-v1i.head, info-v1i.tail,
+   info-v1i.count_max)) {
+   all_queue_available = 0;
+   break;
+   }
+   }
+
+   if (all_queue_available) {
+   netif_wake_queue(netdev);
+   return;
+   }
+   }
+
+   usleep_range(50, 100);
+
+   queue_work(adapter-txrx_wq, adapter-tx_stall_task);
+}
+
 static void fjes_raise_intr_rxdata_task(struct work_struct *work)
 {
struct fjes_adapter *adapter = container_of(work,
@@ -633,6 +691,8 @@ static netdev_tx_t fjes_xmit_frame(struct sk_buff *skb,
 
netdev-trans_start = jiffies;
netif_tx_stop_queue(cur_queue);
+   if 
(!work_pending(adapter-tx_stall_task))
+   queue_work(adapter-txrx_wq, 
adapter-tx_stall_task);
 
ret = NETDEV_TX_BUSY;
}
@@ -722,6 +782,7 @@ static int fjes_probe(struct platform_device *plat_dev)
 
adapter-txrx_wq = create_workqueue(DRV_NAME/txrx);
 
+   INIT_WORK(adapter-tx_stall_task, fjes_tx_stall_task);
INIT_WORK(adapter-raise_intr_rxdata_task,
fjes_raise_intr_rxdata_task);
 
@@ -768,6 +829,7 @@ static int fjes_remove(struct platform_device *plat_dev)
struct fjes_hw *hw = adapter-hw;
 
cancel_work_sync(adapter-raise_intr_rxdata_task);
+   cancel_work_sync(adapter-tx_stall_task);
if 

[PATCH 13/22] fjes: net_device_ops.ndo_change_mtu

2015-06-17 Thread Taku Izumi
This patch adds net_device_ops.ndo_change_mtu.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes_main.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index eeda824..72541a7 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -59,6 +59,7 @@ static void fjes_tx_stall_task(struct work_struct *);
 static irqreturn_t fjes_intr(int, void*);
 static struct rtnl_link_stats64
 *fjes_get_stats64(struct net_device *, struct rtnl_link_stats64 *);
+static int fjes_change_mtu(struct net_device *, int);
 
 static int fjes_acpi_add(struct acpi_device *);
 static int fjes_acpi_remove(struct acpi_device *);
@@ -226,6 +227,7 @@ static const struct net_device_ops fjes_netdev_ops = {
.ndo_stop   = fjes_close,
.ndo_start_xmit = fjes_xmit_frame,
.ndo_get_stats64= fjes_get_stats64,
+   .ndo_change_mtu = fjes_change_mtu,
 };
 
 /*
@@ -748,6 +750,34 @@ static struct rtnl_link_stats64
return stats;
 }
 
+static int fjes_change_mtu(struct net_device *netdev, int new_mtu)
+{
+   int idx;
+   bool running = netif_running(netdev);
+   int ret = 0;
+
+   for (idx = 0; fjes_support_mtu[idx] != 0; idx++) {
+   if (new_mtu = fjes_support_mtu[idx]) {
+
+   new_mtu = fjes_support_mtu[idx];
+   if (new_mtu == netdev-mtu)
+   return 0;
+
+   if (running)
+   fjes_close(netdev);
+
+   netdev-mtu = new_mtu;
+
+   if (running)
+   ret = fjes_open(netdev);
+
+   return ret;
+   }
+   }
+
+   return -EINVAL;
+}
+
 static irqreturn_t fjes_intr(int irq, void *data)
 {
struct fjes_adapter *adapter = data;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/22] fjes: net_device_ops.ndo_start_xmit

2015-06-17 Thread Taku Izumi
This patch adds net_device_ops.ndo_start_xmit callback,
which is called when sending packets.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes.h  |   1 +
 drivers/platform/x86/fjes/fjes_hw.c   |  58 +++
 drivers/platform/x86/fjes/fjes_hw.h   |  12 +++
 drivers/platform/x86/fjes/fjes_main.c | 190 ++
 4 files changed, 261 insertions(+)

diff --git a/drivers/platform/x86/fjes/fjes.h b/drivers/platform/x86/fjes/fjes.h
index 7f03eb7..d8901aa 100644
--- a/drivers/platform/x86/fjes/fjes.h
+++ b/drivers/platform/x86/fjes/fjes.h
@@ -30,6 +30,7 @@
 #define FJES_ACPI_SYMBOL   Extended Socket
 #define FJES_MAX_QUEUES1
 #define FJES_TX_RETRY_INTERVAL (20 * HZ)
+#define FJES_TX_RETRY_TIMEOUT  (100)
 #define FJES_OPEN_ZONE_UPDATE_WAIT (300) /* msec */
 
 /* board specific private data structure */
diff --git a/drivers/platform/x86/fjes/fjes_hw.c 
b/drivers/platform/x86/fjes/fjes_hw.c
index e2137d8..8be343f 100644
--- a/drivers/platform/x86/fjes/fjes_hw.c
+++ b/drivers/platform/x86/fjes/fjes_hw.c
@@ -809,3 +809,61 @@ int fjes_hw_wait_epstop(struct fjes_hw *hw)
return (wait_time  FJES_COMMAND_EPSTOP_WAIT_TIMEOUT * 1000)
? 0 : -EBUSY;
 }
+
+bool fjes_hw_check_epbuf_version(struct epbuf_handler *epbh, u32 version)
+{
+   union ep_buffer_info *info = epbh-info;
+
+   return (info-common.version == version);
+}
+
+bool fjes_hw_check_mtu(struct epbuf_handler *epbh, u32 mtu)
+{
+   union ep_buffer_info *info = epbh-info;
+
+   return (info-v1i.frame_max == FJES_MTU_TO_FRAME_SIZE(mtu));
+}
+
+bool fjes_hw_check_vlan_id(struct epbuf_handler *epbh, u16 vlan_id)
+{
+   union ep_buffer_info *info = epbh-info;
+   int i;
+   bool ret = false;
+
+   if (vlan_id == 0) {
+   ret = true;
+   } else {
+   for (i = 0; i  EP_BUFFER_SUPPORT_VLAN_MAX; i++) {
+   if (vlan_id == info-v1i.vlan_id[i]) {
+   ret = true;
+   break;
+   }
+   }
+   }
+   return ret;
+}
+
+int fjes_hw_epbuf_tx_pkt_send(struct epbuf_handler *epbh,
+   void *frame, size_t size)
+{
+   union ep_buffer_info *info = epbh-info;
+   struct esmem_frame_t *ring_frame;
+
+   if (EP_RING_FULL(info-v1i.head, info-v1i.tail, info-v1i.count_max))
+   return -ENOBUFS;
+
+   ring_frame =
+   (struct esmem_frame_t *)(epbh-
+ ring[EP_RING_INDEX
+  (info-v1i.tail - 1,
+   info-v1i.count_max) *
+  info-v1i.frame_max]);
+
+   ring_frame-frame_size = size;
+   memcpy((void *)(ring_frame-frame_data), (void *)frame, size);
+
+   EP_RING_INDEX_INC(epbh-info-v1i.tail, info-v1i.count_max);
+
+   return 0;
+}
+
diff --git a/drivers/platform/x86/fjes/fjes_hw.h 
b/drivers/platform/x86/fjes/fjes_hw.h
index 170bcfb..f6fdae5 100644
--- a/drivers/platform/x86/fjes/fjes_hw.h
+++ b/drivers/platform/x86/fjes/fjes_hw.h
@@ -49,6 +49,9 @@ struct fjes_hw;
 
 #define FJES_ZONING_ZONE_TYPE_NONE (0xFF)
 
+#define FJES_TX_DELAY_SEND_NONE(0)
+#define FJES_TX_DELAY_SEND_PENDING (1)
+
 #define FJES_RX_STOP_REQ_NONE  (0x0)
 #define FJES_RX_STOP_REQ_DONE  (0x1)
 #define FJES_RX_STOP_REQ_REQUEST   (0x2)
@@ -60,6 +63,11 @@ struct fjes_hw;
 
 #define EP_RING_NUM(buffer_size, frame_size) \
(u32)((buffer_size) / (frame_size))
+#define EP_RING_INDEX(_num, _max) (((_num) + (_max)) % (_max))
+#define EP_RING_INDEX_INC(_num, _max) \
+   ((_num) = EP_RING_INDEX((_num) + 1, (_max)))
+#define EP_RING_FULL(_head, _tail, _max)   \
+   (0 == EP_RING_INDEX(((_tail) - (_head)), (_max)))
 
 #define FJES_MTU_TO_BUFFER_SIZE(mtu) \
(ETH_HLEN + VLAN_HLEN + (mtu) + ETH_FCS_LEN)
@@ -312,5 +320,9 @@ enum ep_partner_status
 
 bool fjes_hw_epid_is_same_zone(struct fjes_hw *, int);
 int fjes_hw_epid_is_shared(struct fjes_device_shared_info *, int);
+bool fjes_hw_check_epbuf_version(struct epbuf_handler *, u32);
+bool fjes_hw_check_mtu(struct epbuf_handler *, u32);
+bool fjes_hw_check_vlan_id(struct epbuf_handler *, u16);
+int fjes_hw_epbuf_tx_pkt_send(struct epbuf_handler *, void *, size_t);
 
 #endif /* FJES_HW_H_ */
diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
index 14a72ec..0d4bca6 100644
--- a/drivers/platform/x86/fjes/fjes_main.c
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -52,6 +52,8 @@ static int fjes_open(struct net_device *);
 static int fjes_close(struct net_device *);
 static int fjes_setup_resources(struct fjes_adapter *);
 static void fjes_free_resources(struct fjes_adapter *);
+static netdev_tx_t fjes_xmit_frame(struct sk_buff *,
+   struct 

[PATCH 07/22] fjes: net_device_ops.ndo_open and .ndo_stop

2015-06-17 Thread Taku Izumi
This patch adds net_device_ops.ndo_open and .ndo_stop
callback. These function is called when network device
activation and deactivation.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/fjes/fjes.h  |   2 +-
 drivers/platform/x86/fjes/fjes_hw.c   | 150 
 drivers/platform/x86/fjes/fjes_hw.h   |  30 
 drivers/platform/x86/fjes/fjes_main.c | 257 ++
 drivers/platform/x86/fjes/fjes_regs.h |  16 +++
 5 files changed, 454 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/fjes/fjes.h b/drivers/platform/x86/fjes/fjes.h
index 3515572..7f03eb7 100644
--- a/drivers/platform/x86/fjes/fjes.h
+++ b/drivers/platform/x86/fjes/fjes.h
@@ -30,7 +30,7 @@
 #define FJES_ACPI_SYMBOL   Extended Socket
 #define FJES_MAX_QUEUES1
 #define FJES_TX_RETRY_INTERVAL (20 * HZ)
-
+#define FJES_OPEN_ZONE_UPDATE_WAIT (300) /* msec */
 
 /* board specific private data structure */
 struct fjes_adapter {
diff --git a/drivers/platform/x86/fjes/fjes_hw.c 
b/drivers/platform/x86/fjes/fjes_hw.c
index 5c68541..e2137d8 100644
--- a/drivers/platform/x86/fjes/fjes_hw.c
+++ b/drivers/platform/x86/fjes/fjes_hw.c
@@ -564,6 +564,7 @@ int fjes_hw_register_buff_addr(struct fjes_hw *hw, int 
dest_epid,
return result;
 }
 
+
 int fjes_hw_unregister_buff_addr(struct fjes_hw *hw, int dest_epid)
 {
union fjes_device_command_req *req_buf = hw-hw_info.req_buf;
@@ -650,6 +651,25 @@ int fjes_hw_unregister_buff_addr(struct fjes_hw *hw, int 
dest_epid)
return result;
 }
 
+int fjes_hw_raise_interrupt(struct fjes_hw *hw, int dest_epid,
+   enum REG_ICTL_MASK  mask)
+{
+   u32 ig = mask | dest_epid;
+
+   wr32(XSCT_IG, cpu_to_le32(ig));
+
+   return 0;
+}
+
+u32 fjes_hw_capture_interrupt_status(struct fjes_hw *hw)
+{
+   u32 cur_is;
+
+   cur_is = rd32(XSCT_IS);
+
+   return cur_is;
+}
+
 void fjes_hw_set_irqmask(struct fjes_hw *hw, enum REG_ICTL_MASK intr_mask,
bool mask)
 {
@@ -659,3 +679,133 @@ void fjes_hw_set_irqmask(struct fjes_hw *hw, enum 
REG_ICTL_MASK intr_mask,
wr32(XSCT_IMC, intr_mask);
 }
 
+bool fjes_hw_epid_is_same_zone(struct fjes_hw *hw, int epid)
+{
+   if (epid = hw-max_epid)
+   return false;
+
+   if ((hw-ep_shm_info[epid].es_status !=
+   FJES_ZONING_STATUS_ENABLE) ||
+   (hw-ep_shm_info[hw-my_epid].zone ==
+   FJES_ZONING_ZONE_TYPE_NONE))
+   return false;
+   else
+   return (hw-ep_shm_info[epid].zone ==
+   hw-ep_shm_info[hw-my_epid].zone);
+
+}
+
+int fjes_hw_epid_is_shared(struct fjes_device_shared_info *share,
+   int dest_epid)
+{
+   int value = false;
+
+   if (dest_epid  share-epnum)
+   value = share-ep_status[dest_epid];
+
+   return value;
+}
+
+static bool fjes_hw_epid_is_stop_requested(struct fjes_hw *hw, int src_epid)
+{
+   return test_bit(src_epid, hw-txrx_stop_req_bit);
+}
+
+static bool fjes_hw_epid_is_stop_process_done(struct fjes_hw *hw, int src_epid)
+{
+   return (hw-ep_shm_info[src_epid].tx.info-v1i.rx_status 
+   FJES_RX_STOP_REQ_DONE);
+}
+
+enum ep_partner_status
+fjes_hw_get_partner_ep_status(struct fjes_hw *hw, int epid)
+{
+   enum ep_partner_status status;
+
+   if (fjes_hw_epid_is_shared(hw-hw_info.share, epid)) {
+   if (fjes_hw_epid_is_stop_requested(hw, epid))
+   status = EP_PARTNER_WAITING;
+   else {
+   if (fjes_hw_epid_is_stop_process_done(hw, epid))
+   status = EP_PARTNER_COMPLETE;
+   else
+   status = EP_PARTNER_SHARED;
+   }
+   } else {
+   status = EP_PARTNER_UNSHARE;
+   }
+
+   return status;
+}
+
+void fjes_hw_raise_epstop(struct fjes_hw *hw)
+{
+   int epidx;
+   enum ep_partner_status status;
+
+   for (epidx = 0; epidx  hw-max_epid; epidx++) {
+   if (epidx == hw-my_epid)
+   continue;
+
+   status = fjes_hw_get_partner_ep_status(hw, epidx);
+   switch (status) {
+   case EP_PARTNER_SHARED:
+   fjes_hw_raise_interrupt(hw, epidx,
+   REG_ICTL_MASK_TXRX_STOP_REQ);
+   break;
+   default:
+   break;
+   }
+
+   set_bit(epidx, hw-hw_info.buffer_unshare_reserve_bit);
+   set_bit(epidx, hw-txrx_stop_req_bit);
+
+   hw-ep_shm_info[epidx].tx.info-v1i.rx_status |=
+   FJES_RX_STOP_REQ_REQUEST;
+
+   }
+
+}
+
+int fjes_hw_wait_epstop(struct fjes_hw *hw)
+{
+   int epidx;
+   int wait_time = 0;
+   enum ep_partner_status status;
+   union 

Re: [PATCH 3/3] net: dsa: Allow configuration of CPU DSA port speeds/duplex

2015-06-17 Thread Andrew Lunn
On Wed, Jun 17, 2015 at 02:09:52PM -0400, Vivien Didelot wrote:
 Hi Andrew, All,
 
 On 12/06/15 10:18, Andrew Lunn wrote:
  By default, DSA and CPU ports are configured to the maximum speed the
  switch supports. However there can be use cases where the peer device
  port is slower. Allow a fixed-link property to be used with the DSA
  and CPU port in the device tree, and use this information to configure
  the port.
 
 Would it be a good idea for DSA to expose the cpu port to userspace as well?
 That way, it'd be possible to use ethtool to set the port speed and duplex
 mode, or dump registers (this would have saved me quite some time in dev).

I have code which expose these via debugfs. So far, i have all
registers, stats, ATU, and the scratch registers. For the patches to
apply cleanly, they depend on these patches, so i've not posted them
yet.

I'm not strongly against having a CPU port, but i don't particularly
like having the CPU port as an interface. And when you get to cascaded
switches, the DSA ports are also interesting, so should we also have a
netdev for them? But they are equally useless for transferring frames
from the host as the CPU port. This is why i went for debugfs.

 
 Also, in my RFC for 802.1Q support [1], I assume the CPU port to be a tagged
 member of each VLAN. But someone may want to add a VLAN with swp3 and swp4
 only, and another VLAN with swp0, swp1 and the CPU port. Am I correct?

The DSA concept is that switch ports are separate interfaces. So
adding a VLAN to two ports does to automatically bridge those ports
together. You need to add them to a bridge as well before VLAN tagged
frames are bridged between ports.

   Andrew
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 01/22] fjes: Introduce FUJITSU Extended Socket Network Device driver

2015-06-17 Thread Joe Perches
On Thu, 2015-06-18 at 09:49 +0900, Taku Izumi wrote:
 This patch adds the basic code of FUJITSU Extended Socket
 Network Device driver.
[]
 diff --git a/drivers/platform/x86/fjes/fjes_main.c 
 b/drivers/platform/x86/fjes/fjes_main.c
[]
 +static acpi_status fjes_get_acpi_resource(struct acpi_resource *acpi_res,
 + void *data)
 +{
 + struct resource *res = data;
 + struct acpi_resource_address32 *addr;
 + struct acpi_resource_irq *irq;
 +
 + switch (acpi_res-type) {
 + case ACPI_RESOURCE_TYPE_ADDRESS32:
 + addr = acpi_res-data.address32;
 + res[0].start = addr-address.minimum;
 + res[0].end = addr-address.minimum +
 + addr-address.address_length;

Isn't this missing - 1?
end = start + length - 1; ?


 +static int __init fjes_init_module(void)
 +{
 + int result;
 +
 + pr_info(%s - version %s\n,
 + fjes_driver_string, fjes_driver_version);
 + pr_info(%s\n, fjes_copyright);

Maybe emit copyright on the same line as version?


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] fm10k: Report MAC address on driver load

2015-06-17 Thread Alexander Duyck
This change adds the MAC address to the list of values recorded on driver
load.  The MAC address represents the serial number of the unit and allows
us to track the value should a card be replaced in a system.

Signed-off-by: Alexander Duyck alexander.h.du...@redhat.com
---
 drivers/net/ethernet/intel/fm10k/fm10k_pci.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c 
b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index df9fda38bdd1..5db41ab3b762 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -1837,7 +1837,7 @@ static int fm10k_probe(struct pci_dev *pdev,
fm10k_ptp_register(interface);
 
/* print bus type/speed/width info */
-   dev_info(pdev-dev, (PCI Express:%s Width: %s Payload: %s)\n,
+   dev_info(pdev-dev, (PCI Express:%s Width: %s Payload: %s) %pM\n,
 (hw-bus.speed == fm10k_bus_speed_8000 ? 8.0GT/s :
  hw-bus.speed == fm10k_bus_speed_5000 ? 5.0GT/s :
  hw-bus.speed == fm10k_bus_speed_2500 ? 2.5GT/s :
@@ -1849,7 +1849,7 @@ static int fm10k_probe(struct pci_dev *pdev,
 (hw-bus.payload == fm10k_bus_payload_128 ? 128B :
  hw-bus.payload == fm10k_bus_payload_256 ? 256B :
  hw-bus.payload == fm10k_bus_payload_512 ? 512B :
- Unknown));
+ Unknown), netdev-dev_addr);
 
/* print warning for non-optimal configurations */
fm10k_slot_warn(interface);

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 00/22] FUJITSU Extended Socket network device driver

2015-06-17 Thread Taku Izumi
This patchsets adds FUJITSU Extended Socket network device driver.
Extended Socket network device is a shared memory based high-speed network 
interface between Extended Partitions of PRIMEQUEST 2000 E2 series.

You can get some information about Extended Partition and Extended
Socket by referring the following manual.

http://globalsp.ts.fujitsu.com/dmsp/Publications/public/CA92344-0537.pdf
 3.2.1 Extended Partitioning
 3.2.2 Extended Socket


Taku Izumi (22):
  fjes: Introduce FUJITSU Extended Socket Network Device driver
  fjes: Hardware initialization routine
  fjes: Hardware cleanup routine
  fjes: platform_driver's .probe and .remove routine
  fjes: ES information acquisition routine
  fjes: buffer address regist/unregistration routine
  fjes: net_device_ops.ndo_open and .ndo_stop
  fjes: net_device_ops.ndo_start_xmit
  fjes: raise_intr_rxdata_task
  fjes: tx_stall_task
  fjes: NAPI polling function
  fjes: net_device_ops.ndo_get_stats64
  fjes: net_device_ops.ndo_change_mtu
  fjes: net_device_ops.ndo_tx_timeout
  fjes: net_device_ops.ndo_vlan_rx_add/kill_vid
  fjes: interrupt_watch_task
  fjes: force_close_task
  fjes: unshare_watch_task
  fjes: update_zone_task
  fjes: epstop_task
  fjes: handle receive cancellation request interrupt
  fjes: ethtool support

 drivers/platform/x86/Kconfig |7 +
 drivers/platform/x86/Makefile|2 +
 drivers/platform/x86/fjes/Makefile   |   31 +
 drivers/platform/x86/fjes/fjes.h |   79 ++
 drivers/platform/x86/fjes/fjes_ethtool.c |  135 +++
 drivers/platform/x86/fjes/fjes_hw.c  | 1159 +++
 drivers/platform/x86/fjes/fjes_hw.h  |  338 +++
 drivers/platform/x86/fjes/fjes_main.c| 1485 ++
 drivers/platform/x86/fjes/fjes_regs.h|  149 +++
 9 files changed, 3385 insertions(+)
 create mode 100644 drivers/platform/x86/fjes/Makefile
 create mode 100644 drivers/platform/x86/fjes/fjes.h
 create mode 100644 drivers/platform/x86/fjes/fjes_ethtool.c
 create mode 100644 drivers/platform/x86/fjes/fjes_hw.c
 create mode 100644 drivers/platform/x86/fjes/fjes_hw.h
 create mode 100644 drivers/platform/x86/fjes/fjes_main.c
 create mode 100644 drivers/platform/x86/fjes/fjes_regs.h

-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 22/22] fjes: ethtool support

2015-06-17 Thread Stephen Hemminger
On Thu, 18 Jun 2015 09:49:47 +0900
Taku Izumi izumi.t...@jp.fujitsu.com wrote:

 +static void fjes_get_ethtool_stats(struct net_device *netdev,
 + struct ethtool_stats *stats, u64 *data)
 +{
 + struct fjes_adapter *adapter = netdev_priv(netdev);
 + int i;
 + char *p = NULL;

Although harmless, this initialization is unnecessary.


 + for (i = 0; i  ARRAY_SIZE(fjes_gstrings_stats); i++) {
 + p = (char *)adapter + fjes_gstrings_stats[i].stat_offset;
 + data[i] = (fjes_gstrings_stats[i].sizeof_stat == sizeof(u64))
 + ? *(u64 *)p : *(u32 *)p;
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [REGRESSION] NFS is creating a hidden port (left over from xs_bind() )

2015-06-17 Thread Steven Rostedt
On Fri, 12 Jun 2015 11:50:38 -0400
Steven Rostedt rost...@goodmis.org wrote:

 I reverted the following commits:
 
 c627d31ba0696cbd829437af2be2f2dee3546b1e
 9e2b9f37760e129cee053cc7b6e7288acc2a7134
 caf4ccd4e88cf2795c927834bc488c8321437586
 
 And the issue goes away. That is, I watched the port go from
 ESTABLISHED to TIME_WAIT, and then gone, and theirs no hidden port.
 
 In fact, I watched the port with my portlist.c module, and it
 disappeared there too when it entered the TIME_WAIT state.
 

I've been running v4.0.5 with the above commits reverted for 5 days
now, and there's still no hidden port appearing.

What's the status on this? Should those commits be reverted or is there
another solution to this bug?

-- Steve
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net 1/2] bridge: multicast: restore router configuration on port link down/up

2015-06-17 Thread Herbert Xu
On Wed, Jun 17, 2015 at 04:28:30AM -0700, Nikolay Aleksandrov wrote:
 From: Satish Ashok sas...@cumulusnetworks.com
 
 When a port goes through a link down/up the multicast router configuration
 is not restored.
 
 Signed-off-by: Satish Ashok sas...@cumulusnetworks.com
 Signed-off-by: Nikolay Aleksandrov niko...@cumulusnetworks.com
 Fixes: 0909e11758bd (bridge: Add multicast_router sysfs entries)

Acked-by: Herbert Xu herb...@gondor.apana.org.au
-- 
Email: Herbert Xu herb...@gondor.apana.org.au
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/22] fjes: Introduce FUJITSU Extended Socket Network Device driver

2015-06-17 Thread Taku Izumi
This patch adds the basic code of FUJITSU Extended Socket
Network Device driver.

When PNP0C02 is found in ACPI DSDT, it evaluates _STR
to check if PNP0C02 is for Extended Socket device driver
and retrieves ACPI resource information. Then creates
platform_device.

Signed-off-by: Taku Izumi izumi.t...@jp.fujitsu.com
---
 drivers/platform/x86/Kconfig  |   7 +
 drivers/platform/x86/Makefile |   2 +
 drivers/platform/x86/fjes/Makefile|  31 +
 drivers/platform/x86/fjes/fjes.h  |  34 +
 drivers/platform/x86/fjes/fjes_main.c | 235 ++
 5 files changed, 309 insertions(+)
 create mode 100644 drivers/platform/x86/fjes/Makefile
 create mode 100644 drivers/platform/x86/fjes/fjes.h
 create mode 100644 drivers/platform/x86/fjes/fjes_main.c

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 9752761..353b613 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -884,4 +884,11 @@ config PVPANIC
  a paravirtualized device provided by QEMU; it lets a virtual machine
  (guest) communicate panic events to the host.
 
+config FUJITSU_ES
+   tristate FUJITSU Extended Socket Network Device driver
+   depends on ACPI
+   ---help---
+ This driver provides support for Extended Socket network device on
+ Extended Partitioning of FUJITSU PRIMEQUEST 2000 series.
+
 endif # X86_PLATFORM_DEVICES
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index f82232b..a88516c 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -58,3 +58,5 @@ obj-$(CONFIG_INTEL_SMARTCONNECT)  += intel-smartconnect.o
 
 obj-$(CONFIG_PVPANIC)   += pvpanic.o
 obj-$(CONFIG_ALIENWARE_WMI)+= alienware-wmi.o
+
+obj-$(CONFIG_FUJITSU_ES)+= fjes/
diff --git a/drivers/platform/x86/fjes/Makefile 
b/drivers/platform/x86/fjes/Makefile
new file mode 100644
index 000..98e59cb
--- /dev/null
+++ b/drivers/platform/x86/fjes/Makefile
@@ -0,0 +1,31 @@
+
+#
+# FUJITSU Extended Socket Network Device driver
+# Copyright (c) 2015 FUJITSU LIMITED
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, see http://www.gnu.org/licenses/.
+#
+# The full GNU General Public License is included in this distribution in
+# the file called COPYING.
+#
+
+
+
+#
+# Makefile for the FUJITSU Extended Socket network device driver
+#
+
+obj-$(CONFIG_FUJITSU_ES) += fjes.o
+
+fjes-objs := fjes_main.o
+
diff --git a/drivers/platform/x86/fjes/fjes.h b/drivers/platform/x86/fjes/fjes.h
new file mode 100644
index 000..5586305
--- /dev/null
+++ b/drivers/platform/x86/fjes/fjes.h
@@ -0,0 +1,34 @@
+/*
+ *  FUJITSU Extended Socket Network Device driver
+ *  Copyright (c) 2015 FUJITSU LIMITED
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, see http://www.gnu.org/licenses/.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called COPYING.
+ *
+ */
+
+
+#ifndef FJES_H_
+#define FJES_H_
+
+#include linux/acpi.h
+
+#define FJES_ACPI_SYMBOL   Extended Socket
+
+extern char fjes_driver_name[];
+extern char fjes_driver_version[];
+extern u32 fjes_support_mtu[];
+
+#endif /* FJES_H_ */
diff --git a/drivers/platform/x86/fjes/fjes_main.c 
b/drivers/platform/x86/fjes/fjes_main.c
new file mode 100644
index 000..258929a1
--- /dev/null
+++ b/drivers/platform/x86/fjes/fjes_main.c
@@ -0,0 +1,235 @@
+/*
+ *  FUJITSU Extended Socket Network Device driver
+ *  Copyright (c) 2015 FUJITSU LIMITED
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but 

[PATCH next v2] bonding: Display LACP info only to CAP_NET_ADMIN capable user

2015-06-17 Thread Mahesh Bandewar
Actor and Partner details can be accessed via proc-fs, sys-fs
entries or netlink interface. These interfaces are world readable
at this moment. The earlier patch-series made the LACP communication
secure to avoid nuisance attack from within the same L2 domain but
it did not prevent someone unprivileged looking at that information
on host and perform the same act.

This patch essentially avoids spitting those entries if the user
in question does not have enough privileges.

Signed-off-by: Mahesh Bandewar mahe...@google.com
---
 drivers/net/bonding/bond_netlink.c |  11 ++--
 drivers/net/bonding/bond_procfs.c  | 101 +++--
 drivers/net/bonding/bond_sysfs.c   |  12 ++---
 3 files changed, 67 insertions(+), 57 deletions(-)

diff --git a/drivers/net/bonding/bond_netlink.c 
b/drivers/net/bonding/bond_netlink.c
index 5580fcde738f..3fd3aa4b145e 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -600,18 +600,23 @@ static int bond_fill_info(struct sk_buff *skb,
 
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
struct ad_info info;
+   u8 zero_mac[ETH_ALEN];
 
+   eth_zero_addr(zero_mac);
if (nla_put_u16(skb, IFLA_BOND_AD_ACTOR_SYS_PRIO,
-   bond-params.ad_actor_sys_prio))
+   capable(CAP_NET_ADMIN) ?
+   bond-params.ad_actor_sys_prio : 0))
goto nla_put_failure;
 
if (nla_put_u16(skb, IFLA_BOND_AD_USER_PORT_KEY,
-   bond-params.ad_user_port_key))
+   capable(CAP_NET_ADMIN) ?
+   bond-params.ad_user_port_key : 0))
goto nla_put_failure;
 
if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM,
sizeof(bond-params.ad_actor_system),
-   bond-params.ad_actor_system))
+   capable(CAP_NET_ADMIN) ?
+   bond-params.ad_actor_system : zero_mac))
goto nla_put_failure;
 
if (!bond_3ad_get_active_agg_info(bond, info)) {
diff --git a/drivers/net/bonding/bond_procfs.c 
b/drivers/net/bonding/bond_procfs.c
index e7f3047a26df..f514fe5e80a5 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -135,27 +135,30 @@ static void bond_info_show_master(struct seq_file *seq)
  bond-params.ad_select);
seq_printf(seq, Aggregator selection policy (ad_select): %s\n,
   optval-string);
-   seq_printf(seq, System priority: %d\n,
-  BOND_AD_INFO(bond).system.sys_priority);
-   seq_printf(seq, System MAC address: %pM\n,
-  BOND_AD_INFO(bond).system.sys_mac_addr);
-
-   if (__bond_3ad_get_active_agg_info(bond, ad_info)) {
-   seq_printf(seq, bond %s has no active aggregator\n,
-  bond-dev-name);
-   } else {
-   seq_printf(seq, Active Aggregator Info:\n);
-
-   seq_printf(seq, \tAggregator ID: %d\n,
-  ad_info.aggregator_id);
-   seq_printf(seq, \tNumber of ports: %d\n,
-  ad_info.ports);
-   seq_printf(seq, \tActor Key: %d\n,
-  ad_info.actor_key);
-   seq_printf(seq, \tPartner Key: %d\n,
-  ad_info.partner_key);
-   seq_printf(seq, \tPartner Mac Address: %pM\n,
-  ad_info.partner_system);
+   if (capable(CAP_NET_ADMIN)) {
+   seq_printf(seq, System priority: %d\n,
+  BOND_AD_INFO(bond).system.sys_priority);
+   seq_printf(seq, System MAC address: %pM\n,
+  BOND_AD_INFO(bond).system.sys_mac_addr);
+
+   if (__bond_3ad_get_active_agg_info(bond, ad_info)) {
+   seq_printf(seq,
+  bond %s has no active aggregator\n,
+  bond-dev-name);
+   } else {
+   seq_printf(seq, Active Aggregator Info:\n);
+
+   seq_printf(seq, \tAggregator ID: %d\n,
+  ad_info.aggregator_id);
+   seq_printf(seq, \tNumber of ports: %d\n,
+  ad_info.ports);
+   seq_printf(seq, \tActor Key: %d\n,
+  ad_info.actor_key);
+   seq_printf(seq, 

Re: [PATCH 02/22] fjes: Hardware initialization routine

2015-06-17 Thread Joe Perches
On Thu, 2015-06-18 at 09:49 +0900, Taku Izumi wrote:
 This patch adds hardware initialization routine to be
 invoked at driver's .probe routine.

Trivial notes:

Please run all your patches through scripts/checkpatch.pl and
fix whatever messages it emits as you think appropriate.

 diff --git a/drivers/platform/x86/fjes/fjes_hw.c 
 b/drivers/platform/x86/fjes/fjes_hw.c
[]
 +/* supported MTU list */
 +u32 fjes_support_mtu[] = {
 + FJES_MTU_DEFINE(8 * 1024),
 + FJES_MTU_DEFINE(16 * 1024),
 + FJES_MTU_DEFINE(32 * 1024),
 + FJES_MTU_DEFINE(64 * 1024),
 + 0
 +};

Maybe these should be const?

 +static u8 *fjes_hw_iomap(struct fjes_hw *hw)
 +{
 + u8 *base;
 +
 + if (!request_mem_region(hw-hw_res.start, hw-hw_res.size,
 + fjes_driver_name)) {
 + pr_err(request_mem_region failed);

Please make sure all pr_level logging messages end with a \n
so that interleaving by other process threads can't happen.

 +static int fjes_hw_setup(struct fjes_hw *hw)
 +{
[]
 + mem_size = sizeof(struct ep_share_mem_info) * (hw-max_epid);
 + buf = kzalloc(mem_size, GFP_KERNEL);

kcalloc?

[]

 + memset((void *)param, 0, sizeof(param));

Unnecessary cast

 diff --git a/drivers/platform/x86/fjes/fjes_hw.h 
 b/drivers/platform/x86/fjes/fjes_hw.h
[]
 +#define FJES_DEVICE_RESET_TIMEOUT  ((17 + 1) * 3) /* sec */

48 second timeout for a device reset?

 +/* Frame  MTU */
 +struct esmem_frame_t {
 + __le32 frame_size;
 + u8 frame_data[];
 +};

Using a _t suffix for a struct type can be confusing.

[]
 + struct _ep_buffer_info_common_t {
 + u32 version;
 + } common;
 +
 + struct _ep_buffer_info_v1_t {

 diff --git a/drivers/platform/x86/fjes/fjes_regs.h 
 b/drivers/platform/x86/fjes/fjes_regs.h

 +/* Interrupt Control registers */
 +#define XSCT_IMS0x0084  /* Interrupt mas set */

mask

 +#define XSCT_IMC0x0088  /* Interrupt mask clear */


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Intel-wired-lan] [PATCH v6 3/3] ixgbe, ixgbevf: Add new mbox API to enable MC promiscuous mode

2015-06-17 Thread Alexander Duyck

On 06/17/2015 04:45 AM, Hiroshi Shimamoto wrote:

From: Hiroshi Shimamoto h-shimam...@ct.jp.nec.com

The limitation of the number of multicast address for VF is not enough
for the large scale server with SR-IOV feature.
IPv6 requires the multicast MAC address for each IP address to handle
the Neighbor Solicitation message.
We couldn't assign over 30 IPv6 addresses to a single VF interface.

The easy way to solve this is enabling multicast promiscuous mode.
It is good to have a functionality to enable multicast promiscuous mode
for each VF from VF driver.

This patch introduces the new mbox API, IXGBE_VF_SET_MC_PROMISC, to
enable/disable multicast promiscuous mode in VF. If multicast
promiscuous mode is enabled the VF can receive all multicast packets.

With this patch, the ixgbevf driver automatically enable multicast
promiscuous mode when the number of multicast addresses is over than 30
if possible.

PF only allow to enbale VF multicast promiscuous mode if the VF is trusted.
If not trusted, PF returns an error to VF and VF will fallback the previous
behavior, that only 30 multicast addresses are registered to the filter.

Signed-off-by: Hiroshi Shimamoto h-shimam...@ct.jp.nec.com
CC: Choi, Sy Jong sy.jong.c...@intel.com
---
  drivers/net/ethernet/intel/ixgbe/ixgbe.h  |  1 +
  drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h  |  2 +
  drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c| 55 +++
  drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |  3 ++
  drivers/net/ethernet/intel/ixgbevf/mbx.h  |  2 +
  drivers/net/ethernet/intel/ixgbevf/vf.c   | 49 +++-
  drivers/net/ethernet/intel/ixgbevf/vf.h   |  1 +
  7 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 7f76c12..054db64 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -146,6 +146,7 @@ struct vf_data_storage {
u16 vlans_enabled;
bool clear_to_send;
bool pf_set_mac;
+   bool mc_promisc;
u16 pf_vlan; /* When set, guest VLAN config not allowed. */
u16 pf_qos;
u16 tx_rate;


Instead of casting this as a bool I think it might be better served as 
an enum.  You basically have 4 levels you could set:

DISABLEDNo traffic allowed, Rx disabled, PF only
NONEonly L2 exact match addresses or Flow Director enabled
MULTI   BAM  ROMPE set
ALLMULTIBAM, ROMPE,  MPE set
PROMISC BAM, ROMPE, MPE,  UPE (available on x540)
VLAN_PROMISCBAM, ROMPE, MPE, UPE,  VPE (available on x540)

That just leaves AUPE and ROPE which are kind of special cases.  AUPE 
should be set if an port VLAN is not assigned by the PF, and as far as 
ROPE it could be thought of as a poor-mans promiscuous so it might be 
useful for 82599 to possibly try to put together some sort of 
promiscuous mode though I cannot say for certain.


The idea is to make use of the enum to enable higher or lower levels of 
escalation.  You could then limit a non-trusted VF to MULTI for any 
requests of ALLMULTI, PROMISC, or VLAN_PROMSIC and if the VF is trusted 
it would have access to ALLMULTI on 82599, and potentially PROMISC or 
VLAN_PROMISC on x540 and newer.


It hadn't occurred to me until just now that the NONE option might be 
desirable to some as well since it is possible that somebody would 
rather use flow director rules to send traffic to a VF rather than have 
it receive broadcast or multicast traffic.  By doing this we enable that 
as a possible use case.  It could all be controlled through the 
IFF_BROADCAST, IFF_MULTICAST, IFF_ALLMULTI, and IFF_PROMISC flags in 
set_rx_mode.


We did something like this for fm10k as it was a requirement of the 
Switch API.  You could probably do something similar for the 
ixgbe/ixgbevf mailbox interface as it seems like it might be a better 
fit than adding a new message to cover one specific case.



diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
index b1e4703..703d40b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
@@ -102,6 +102,8 @@ enum ixgbe_pfvf_api_rev {
  #define IXGBE_VF_GET_RETA 0x0a/* VF request for RETA */
  #define IXGBE_VF_GET_RSS_KEY  0x0b/* get RSS key */

+#define IXGBE_VF_SET_MC_PROMISC0x0c/* VF requests MC promiscuous */
+
  /* length of permanent address message returned from PF */
  #define IXGBE_VF_PERMADDR_MSG_LEN 4
  /* word in permanent address message with the current multicast type */


You might just want to refer to this as SET_XCAST_MODE since that is 
essentially what this command is doing.



diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 826f88e..925d9c6 100644
--- 

Re: [PATCH 08/11] IB/cma: Add net_dev and private data checks to RDMA CM

2015-06-17 Thread Jason Gunthorpe
On Tue, Jun 16, 2015 at 08:26:26AM +0300, Haggai Eran wrote:
 On 15/06/2015 20:08, Jason Gunthorpe wrote:
  On Mon, Jun 15, 2015 at 11:47:13AM +0300, Haggai Eran wrote:
  Instead of relying on a the ib_cm module to check an incoming CM request's
  private data header, add these checks to the RDMA CM module. This allows a
  following patch to to clean up the ib_cm interface and remove the code that
  looks into the private headers. It will also allow supporting namespaces in
  RDMA CM by making these checks namespace aware later on.
  
  I was expecting one of these patches to flow the net_device from here:
  
  +static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
  +const struct cma_req_info *req)
  +{
  
  Down through cma_req_handler and cma_new_conn_id so that we get rid of
  the cma_translate_addr on the ingress side.
  
  Having the ingress side use one ingress net_device for all processing
  seems very important to me...
 
 Is it really very important? I thought the bound_dev_if of a passive
 connection id is only used by the netlink statistics mechanism.

I mean 'very important' in the sense it makes the RDMA-CM *make
logical sense*, not so much in the 'can user space tell'.

So yes, cleaning this seems very important to establish that logical
narrative of how the packet flows through this code.

Plus, there is an init_net in the cma_translate_addr path that needs to
be addressed - so purging cma_translate_addr is a great way to handle
that. That would leave only the call in rdma_bind_addr, and for bind,
the process net namespace is the correct thing to use.

Jason
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next 43/43] netfilter: Skip unnecessary calls to synchronize_net

2015-06-17 Thread Patrick McHardy
On 17.06, Eric W. Biederman wrote:
 From: Eric W Biederman ebied...@xmission.com
 
 Signed-off-by: Eric W. Biederman ebied...@xmission.com
 ---
  net/netfilter/core.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)
 
 diff --git a/net/netfilter/core.c b/net/netfilter/core.c
 index 95456c09cf69..1b4eadc9c030 100644
 --- a/net/netfilter/core.c
 +++ b/net/netfilter/core.c
 @@ -134,7 +134,9 @@ void nf_unregister_hook(struct net *net, const struct 
 nf_hook_ops *reg)
  #ifdef HAVE_JUMP_LABEL
   static_key_slow_dec(nf_hooks_needed[reg-pf][reg-hooknum]);
  #endif
 - synchronize_net();
 + /* Don't wait if there are no packets in flight */
 + if (net-loopback_dev)
 + synchronize_net();

I don't get this, could you please explain why there wouldn't be any packets
in flight if there is no loopback_dev?

   kfree(elem);
  }
  EXPORT_SYMBOL(nf_unregister_hook);
 -- 
 2.2.1
 
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 0/3] Fix Ethernet jumbo frames support for Armada 370 and 38x

2015-06-17 Thread Simon Guinot
On Wed, Jun 17, 2015 at 05:24:58PM +0200, Thomas Petazzoni wrote:
 Dear Simon Guinot,
 
 On Wed, 17 Jun 2015 15:19:19 +0200, Simon Guinot wrote:
 
  This patch series fixes the Ethernet jumbo frames support for the SoCs
  Armada 370, 380 and 385. Unlike Armada XP, the Ethernet controller for
  this SoCs don't support TCP/IP checksumming with a frame size larger
  than 1600 bytes.
  
  This patches should be applied to the -stable kernels 3.8 and onwards.
 
 You should add a Fixes: tag to each commit to indicate which commit is
 being fixed by your patches.
 
 Also, I was a bit surprised by your statement that Armada 38x is also
 affected by the problem, since Armada 38x is more recent than Armada
 XP. but indeed, according to the Armada 38x datasheet:
 
   IPv4 and TCP/UDP over IPv4/IPv6 checksum generation on transmit
   frames for standard Ethernet packet size
 
 While the Armada XP datasheet says:
 
   Long frames transmission (including jumbo frames), with
   IPv4/v6/TCP/UDP checksum generation
 
 So it seems like you're right about this!

At first, I though this was an error in the Armada 38x datasheet (maybe
a sloppy copy/paster). Therefore I have checked on an DB-88F6820-GP
board and then I can confirm that the Armada 38x is also affected.

Simon


signature.asc
Description: Digital signature


Re: [PATCH V3 0/2] pci: Provide a flag to access VPD through function 0

2015-06-17 Thread Alexander Duyck

On 06/17/2015 09:29 AM, Rustad, Mark D wrote:

+ Alex


On Jun 5, 2015, at 2:59 PM, Rustad, Mark D mark.d.rus...@intel.com wrote:


On Jun 3, 2015, at 11:46 AM, Mark D Rustad mark.d.rus...@intel.com wrote:

Many multi-function devices provide shared registers in extended
config space for accessing VPD. The behavior of these registers
means that the state must be tracked and access locked correctly
for accesses not to hang or worse. One way to meet these needs is
to always perform the accesses through function 0, thereby using
the state tracking and mutex that already exists.

To provide this behavior, add a dev_flags bit to indicate that this
should be done. This bit can then be set for any non-zero function
that needs to redirect such VPD access to function 0. Do not set
this bit on the zero function or there will be an infinite recursion.

The second patch uses this new flag to invoke this behavior on all
multi-function Intel Ethernet devices.

Signed-off-by: Mark Rustad mark.d.rus...@intel.com

---
Changes in V2:
- Corrected a spelling error in a log message
- Added checks to see that the referenced function 0 is reasonable
Changes in V3:
- Don't leak a device reference
- Check that function 0 has VPD
- Make a helper for the function 0 checks
- Moved a multifunction check to the quirk patch

So does this series look acceptable now? I think I addressed the issues that 
Alex raised. Can these also be considered for -stable?

More than a week has passed without any comment. Is this going to be accepted 
or is there still an issue?


Yeah, this looks like it has addressed most of the corner cases so I am 
good with it.


Acked-by: Alexander Duyck alexander.h.du...@redhat.com
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net] ipv4: include NLM_F_APPEND flag in append route notifications

2015-06-17 Thread Alexander Duyck

On 06/17/2015 09:20 AM, roopa wrote:

On 6/17/15, 8:35 AM, Alexander Duyck wrote:



@@ -1189,8 +1190,9 @@ int fib_table_insert(struct fib_table *tb, struct
fib_config *cfg)
 fib_release_info(fi_drop);
 if (state  FA_S_ACCESSED)
rt_cache_flush(cfg-fc_nlinfo.nl_net);
+   nlflags |= NLM_F_REPLACE;
 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, 
plen,
-   tb-tb_id, cfg-fc_nlinfo, 
NLM_F_REPLACE);

+   tb-tb_id, cfg-fc_nlinfo, nlflags);

 goto succeeded;



Why even bother modifying this part?  Is this actually needed at all, 
are there some other flags you plan to drop into nlflags as well that 
would be passed as a part of this message?


agreed, for the same reason my initial patch did not touch this part. 
Nope, no other flags. I was trying to meet scotts concerns.



@@ -1201,7 +1203,9 @@ int fib_table_insert(struct fib_table *tb, struct
fib_config *cfg)
 if (fa_match)
 goto out;

-   if (!(cfg-fc_nlflags  NLM_F_APPEND))
+   if (cfg-fc_nlflags  NLM_F_APPEND)
+   nlflags |= NLM_F_APPEND;
+   else
 fa = fa_first;
 }
 err = -ENOENT;


I'm not sure I see the point of using the |=.   Why not just use a = 
and save yourself an instruction or two since you don't really need 
the OR operator in this case.



ack,

I would prefer keeping my initial patch which was pretty non-intrusive.


I'd say go with something closer to the original patch, but flip the 
logic like you have here, and lose the |= in favor of an = since you 
are either sending a message with 0 or NLM_F_APPEND.


Anyway that is just my $.02.

- Alex
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 1/3] net: mvneta: introduce compatible string marvell, armada-xp-neta

2015-06-17 Thread Thomas Petazzoni
Jason,

On Wed, 17 Jun 2015 17:01:12 +, Jason Cooper wrote:

 I disagree with this.  We can't predict what incosistencies we'll discover in
 the future.  We should only assign new compatible strings based on known IP
 variations when we discover them.  This seems fraught with demons since we
 can't predict the scope of affected IP blocks (some steppings of one SoC, 
 three
 SoCs plus two steppings of a fourth, etc)
 
 imho, the 'future-proofing' lies in being specific as to the naming of the
 compatible strings against known hardware variations at the time.

Except that this clearly doesn't work, and the case raised by Simon is
a perfect illustration of why planning ahead is beneficial. We already
had the issue several times on mvebu platforms, so it should really
become the rule to have one compatible string specific to the actual
SoC in the list of compatible strings.

Not doing so requires breaking DT backward compatibility more often, so
wanting DT backward compatibility and not wanting to plan ahead is a
bit antagonist. But I personally don't care much about DT backward
compatibility, and I've explained numerous times why, so in the end I
don't really care much.

Best regards,

Thomas
-- 
Thomas Petazzoni, CTO, Free Electrons
Embedded Linux, Kernel and Android engineering
http://free-electrons.com
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net-next] switchdev: fdb filter_dev is always NULL for self (device), so remove check

2015-06-17 Thread Samudrala, Sridhar



On 6/17/2015 4:08 PM, sfel...@gmail.com wrote:

From: Scott Feldman sfel...@gmail.com

Remove the filter_dev check when dumping fdb entries, otherwise dump
returns empty list.  filter_dev is always passed as NULL when dumping fdbs
on SELF.  We want the fdbs installed on the device to be listed in the
dump.

Signed-off-by: Scott Feldman sfel...@gmail.com

Acked-by: Sridhar Samudrala sridhar.samudr...@intel.com


Fixes: 45d4122c (switchdev: add support for fdb add/del/dump via switchdev_port_obj 
ops)
---
  net/switchdev/switchdev.c |6 --
  1 file changed, 6 deletions(-)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index a5d0f8e..7dda437 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -657,7 +657,6 @@ struct switchdev_fdb_dump {
struct switchdev_obj obj;
struct sk_buff *skb;
struct netlink_callback *cb;
-   struct net_device *filter_dev;
int idx;
  };
  
@@ -670,14 +669,10 @@ static int switchdev_port_fdb_dump_cb(struct net_device *dev,

u32 seq = dump-cb-nlh-nlmsg_seq;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
-   struct net_device *master = netdev_master_upper_dev_get(dev);
  
  	if (dump-idx  dump-cb-args[0])

goto skip;
  
-	if (master  dump-filter_dev != master)

-   goto skip;
-
nlh = nlmsg_put(dump-skb, portid, seq, RTM_NEWNEIGH,
sizeof(*ndm), NLM_F_MULTI);
if (!nlh)
@@ -731,7 +726,6 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct 
netlink_callback *cb,
},
.skb = skb,
.cb = cb,
-   .filter_dev = filter_dev,
.idx = idx,
};
int err;


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 2/3] net/xen-netback: Remove unused code in xenvif_rx_action

2015-06-17 Thread Julien Grall
The variables old_req_cons and ring_slots_used are assigned but never
used since commit 1650d5455bd2dc6b5ee134bd6fc1a3236c266b5b xen-netback:
always fully coalesce guest Rx packets.

Signed-off-by: Julien Grall julien.gr...@citrix.com
Acked-by: Wei Liu wei.l...@citrix.com
Cc: Ian Campbell ian.campb...@citrix.com
Cc: netdev@vger.kernel.org

---
Changes in v2:
- Add Wei's Acked-by
---
 drivers/net/xen-netback/netback.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c 
b/drivers/net/xen-netback/netback.c
index 0d25943..ba3ae30 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -515,14 +515,9 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
 
while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)
(skb = xenvif_rx_dequeue(queue)) != NULL) {
-   RING_IDX old_req_cons;
-   RING_IDX ring_slots_used;
-
queue-last_rx_time = jiffies;
 
-   old_req_cons = queue-rx.req_cons;
XENVIF_RX_CB(skb)-meta_slots_used = xenvif_gop_skb(skb, npo, 
queue);
-   ring_slots_used = queue-rx.req_cons - old_req_cons;
 
__skb_queue_tail(rxq, skb);
}
-- 
2.1.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 3/3] net/xen-netback: Don't mix hexa and decimal with 0x in the printf format

2015-06-17 Thread Julien Grall
Prepend 0x to all %x in order to avoid confusion while reading when there is
other decimal value in the log.

Also replace some of the hexadecimal print to decimal to uniformize the
format with netfront.

Signed-off-by: Julien Grall julien.gr...@citrix.com
Acked-by: Ian Campbell ian.campb...@citrix.com
Cc: Wei Liu wei.l...@citrix.com
Cc: netdev@vger.kernel.org

---
Changes in v5:
- Fix commit message
- Add Ian's ack.

Changes in v4:
- Patch added
---
 drivers/net/xen-netback/netback.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c 
b/drivers/net/xen-netback/netback.c
index ba3ae30..11bd9d8 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -748,7 +748,7 @@ static int xenvif_count_requests(struct xenvif_queue *queue,
slots++;
 
if (unlikely((txp-offset + txp-size)  PAGE_SIZE)) {
-   netdev_err(queue-vif-dev, Cross page boundary, 
txp-offset: %x, size: %u\n,
+   netdev_err(queue-vif-dev, Cross page boundary, 
txp-offset: %u, size: %u\n,
 txp-offset, txp-size);
xenvif_fatal_tx_err(queue-vif);
return -EINVAL;
@@ -874,7 +874,7 @@ static inline void xenvif_grant_handle_set(struct 
xenvif_queue *queue,
if (unlikely(queue-grant_tx_handle[pending_idx] !=
 NETBACK_INVALID_HANDLE)) {
netdev_err(queue-vif-dev,
-  Trying to overwrite active handle! pending_idx: 
%x\n,
+  Trying to overwrite active handle! pending_idx: 
0x%x\n,
   pending_idx);
BUG();
}
@@ -887,7 +887,7 @@ static inline void xenvif_grant_handle_reset(struct 
xenvif_queue *queue,
if (unlikely(queue-grant_tx_handle[pending_idx] ==
 NETBACK_INVALID_HANDLE)) {
netdev_err(queue-vif-dev,
-  Trying to unmap invalid handle! pending_idx: %x\n,
+  Trying to unmap invalid handle! pending_idx: 
0x%x\n,
   pending_idx);
BUG();
}
@@ -1243,7 +1243,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue 
*queue,
/* No crossing a page as the payload mustn't fragment. */
if (unlikely((txreq.offset + txreq.size)  PAGE_SIZE)) {
netdev_err(queue-vif-dev,
-  txreq.offset: %x, size: %u, end: %lu\n,
+  txreq.offset: %u, size: %u, end: %lu\n,
   txreq.offset, txreq.size,
   (unsigned long)(txreq.offset~PAGE_MASK) + 
txreq.size);
xenvif_fatal_tx_err(queue-vif);
@@ -1593,12 +1593,12 @@ static inline void xenvif_tx_dealloc_action(struct 
xenvif_queue *queue)
queue-pages_to_unmap,
gop - queue-tx_unmap_ops);
if (ret) {
-   netdev_err(queue-vif-dev, Unmap fail: nr_ops %tx ret 
%d\n,
+   netdev_err(queue-vif-dev, Unmap fail: nr_ops %tu ret 
%d\n,
   gop - queue-tx_unmap_ops, ret);
for (i = 0; i  gop - queue-tx_unmap_ops; ++i) {
if (gop[i].status != GNTST_okay)
netdev_err(queue-vif-dev,
-   host_addr: %llx handle: %x 
status: %d\n,
+   host_addr: 0x%llx handle: 
0x%x status: %d\n,
   gop[i].host_addr,
   gop[i].handle,
   gop[i].status);
@@ -1731,7 +1731,7 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 
pending_idx)
queue-mmap_pages[pending_idx], 1);
if (ret) {
netdev_err(queue-vif-dev,
-  Unmap fail: ret: %d pending_idx: %d host_addr: %llx 
handle: %x status: %d\n,
+  Unmap fail: ret: %d pending_idx: %d host_addr: %llx 
handle: 0x%x status: %d\n,
   ret,
   pending_idx,
   tx_unmap_op.host_addr,
-- 
2.1.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 1/3] net/xen-netfront: Correct printf format in xennet_get_responses

2015-06-17 Thread Julien Grall
rx-status is an int16_t, print it using %d rather than %u in order to
have a meaningful value when the field is negative.

Also use %u rather than %x for rx-offset.

Signed-off-by: Julien Grall julien.gr...@citrix.com
Reviewed-by: David Vrabel david.vra...@citrix.com
Cc: Konrad Rzeszutek Wilk konrad.w...@oracle.com
Cc: Boris Ostrovsky boris.ostrov...@oracle.com
Cc: netdev@vger.kernel.org

---
Changes in v4:
- Use %u for the rx-offset because offset is unsigned

Changes in v3:
- Use %d for the rx-offset too.

Changes in v2:
- Add David's Reviewed-by
---
 drivers/net/xen-netfront.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index e031c94..281720f 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -733,7 +733,7 @@ static int xennet_get_responses(struct netfront_queue 
*queue,
if (unlikely(rx-status  0 ||
 rx-offset + rx-status  PAGE_SIZE)) {
if (net_ratelimit())
-   dev_warn(dev, rx-offset: %x, size: %u\n,
+   dev_warn(dev, rx-offset: %u, size: %d\n,
 rx-offset, rx-status);
xennet_move_rx_slot(queue, skb, ref);
err = -EINVAL;
-- 
2.1.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net 1/2] bridge: multicast: restore router configuration on port link down/up

2015-06-17 Thread Nikolay Aleksandrov
From: Satish Ashok sas...@cumulusnetworks.com

When a port goes through a link down/up the multicast router configuration
is not restored.

Signed-off-by: Satish Ashok sas...@cumulusnetworks.com
Signed-off-by: Nikolay Aleksandrov niko...@cumulusnetworks.com
Fixes: 0909e11758bd (bridge: Add multicast_router sysfs entries)
---
 net/bridge/br_multicast.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index ff667e18b2d6..761fc733bf6d 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -37,6 +37,8 @@
 
 static void br_multicast_start_querier(struct net_bridge *br,
   struct bridge_mcast_own_query *query);
+static void br_multicast_add_router(struct net_bridge *br,
+   struct net_bridge_port *port);
 unsigned int br_mdb_rehash_seq;
 
 static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
@@ -936,6 +938,8 @@ void br_multicast_enable_port(struct net_bridge_port *port)
 #if IS_ENABLED(CONFIG_IPV6)
br_multicast_enable(port-ip6_own_query);
 #endif
+   if (port-multicast_router == 2  hlist_unhashed(port-rlist))
+   br_multicast_add_router(br, port);
 
 out:
spin_unlock(br-multicast_lock);
-- 
2.4.3

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net 0/2] bridge: multicast behaviour fixes

2015-06-17 Thread Nikolay Aleksandrov
Hi,
Patch 01 fixes a problem when a router is configured and a port goes
through a link down/up, the router configuration was not restored.
Patch 02 starts the multicast querier when using user-space STP and a
port goes to forwarding state.
These are behaviour fixes and if you think they are more appropriate for
net-next, then feel free to apply them there, I've run them with both
net and net-next. Also I've provided fixes tags, but since these are
behaviour changes they're the initial implementations of these functions.

Best regards,
 Nikolay Aleksandrov

Satish Ashok (2):
  bridge: multicast: restore router configuration on port link down/up
  bridge: multicast: start querier timer when running user-space stp

 net/bridge/br_multicast.c | 4 
 net/bridge/br_stp.c   | 3 +++
 2 files changed, 7 insertions(+)

-- 
2.4.3

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH net 2/2] bridge: multicast: start querier timer when running user-space stp

2015-06-17 Thread Nikolay Aleksandrov
From: Satish Ashok sas...@cumulusnetworks.com

When STP is running in user-space and querier is configured, the
querier timer is not started when a port goes to forwarding state.

Signed-off-by: Satish Ashok sas...@cumulusnetworks.com
Signed-off-by: Nikolay Aleksandrov niko...@cumulusnetworks.com
Fixes: eb1d16414339 (bridge: Add core IGMP snooping support)
---
 net/bridge/br_stp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index fb3ebe615513..1e2f2f1ff6b0 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -456,6 +456,9 @@ void br_port_state_selection(struct net_bridge *br)
p-topology_change_ack = 0;
br_make_blocking(p);
}
+   } else if (br-stp_enabled == BR_USER_STP 
+  p-state == BR_STATE_FORWARDING) {
+   br_multicast_enable_port(p);
}
 
if (p-state == BR_STATE_FORWARDING)
-- 
2.4.3

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] net: via/Kconfig: replace USE_OF with OF_???

2015-06-17 Thread Antonio Borneo
USE_OF is used as intermediate Kconfig option by few
arch's (ARM, MIPS, Xtensa).
Replace instances of USE_OF outside of arch folders
with proper OF_???.

Signed-off-by: Antonio Borneo borneo.anto...@gmail.com
---

 drivers/net/ethernet/via/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/via/Kconfig b/drivers/net/ethernet/via/Kconfig
index f66ddae..ad7f512 100644
--- a/drivers/net/ethernet/via/Kconfig
+++ b/drivers/net/ethernet/via/Kconfig
@@ -19,7 +19,7 @@ if NET_VENDOR_VIA
 
 config VIA_RHINE
tristate VIA Rhine support
-   depends on (PCI || USE_OF)
+   depends on (PCI || OF_IRQ)
select CRC32
select MII
---help---
@@ -43,7 +43,7 @@ config VIA_RHINE_MMIO
 
 config VIA_VELOCITY
tristate VIA Velocity support
-   depends on (PCI || USE_OF)
+   depends on (PCI || (OF_ADDRESS  OF_IRQ))
select CRC32
select CRC_CCITT
select MII
-- 
2.4.3

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] clocksource: Kconfig: replace USE_OF with OF

2015-06-17 Thread Antonio Borneo
USE_OF is used as intermediate Kconfig option by few
arch's (ARM, MIPS, Xtensa); in all these cases it
implies setting option OF too.
Replace the only instance of USE_OF in clocksource
with OF.

Signed-off-by: Antonio Borneo borneo.anto...@gmail.com
---

 drivers/clocksource/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 51d7865f..d7410c2 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -254,7 +254,7 @@ config CLKSRC_MIPS_GIC
 
 config CLKSRC_PXA
def_bool y if ARCH_PXA || ARCH_SA1100
-   select CLKSRC_OF if USE_OF
+   select CLKSRC_OF if OF
help
  This enables OST0 support available on PXA and SA-11x0
  platforms.
-- 
2.4.3

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] net: via-rhine: remove unneeded include file

2015-06-17 Thread Antonio Borneo
Signed-off-by: Antonio Borneo borneo.anto...@gmail.com
---

 drivers/net/ethernet/via/via-rhine.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/via/via-rhine.c 
b/drivers/net/ethernet/via/via-rhine.c
index de28504..a1aaf3a 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -96,7 +96,6 @@ static const int multicast_filter_limit = 32;
 #include linux/ioport.h
 #include linux/interrupt.h
 #include linux/pci.h
-#include linux/of_address.h
 #include linux/of_device.h
 #include linux/of_irq.h
 #include linux/platform_device.h
-- 
2.4.3

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/2] replace USE_OF in drivers

2015-06-17 Thread Antonio Borneo
Kconfig option USE_OF is used as intermediate option by few
arch's (ARM, MIPS, Xtensa); in all these cases USE_OF implies
setting option OF and other OF_???.

Outside arch folder, USE_OF is used only in two places.
In both places, USE_OF can be replaced by OF or OF_???.
This change reduces the scope of USE_OF and makes the drivers
(actually only the Via network ones) available to other arch's
that don't use USE_OF.

The two patches of this series can be applied independently and
can be taken separately in the respective tree.

Antonio Borneo (2):
  clocksource: Kconfig: replace USE_OF with OF
  net: via/Kconfig: replace USE_OF with OF_???

 drivers/clocksource/Kconfig  | 2 +-
 drivers/net/ethernet/via/Kconfig | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

-- 
2.4.3

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v6 2/3] ixgbe: Add new ndo to trust VF

2015-06-17 Thread Hiroshi Shimamoto
From: Hiroshi Shimamoto h-shimam...@ct.jp.nec.com

Implements the new netdev op to trust VF in ixgbe.

The administrator can turn on and off VF trusted by ip command which
supports trust message.
 # ip link set dev eth0 vf 1 trust on
or
 # ip link set dev eth0 vf 1 trust off

Send a ping to reset VF on changing the status of trusting.
VF driver will reconfigure its features on reset.

Signed-off-by: Hiroshi Shimamoto h-shimam...@ct.jp.nec.com
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h   |  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  |  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 45 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h |  1 +
 4 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 8830c0f..7f76c12 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -152,6 +152,7 @@ struct vf_data_storage {
u16 vlan_count;
u8 spoofchk_enabled;
bool rss_query_enabled;
+   u8 trusted;
unsigned int vf_api;
 };
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 5f1b06a..376b49b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8349,6 +8349,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
.ndo_set_vf_rate= ixgbe_ndo_set_vf_bw,
.ndo_set_vf_spoofchk= ixgbe_ndo_set_vf_spoofchk,
.ndo_set_vf_rss_query_en = ixgbe_ndo_set_vf_rss_query_en,
+   .ndo_set_vf_trust   = ixgbe_ndo_set_vf_trust,
.ndo_get_vf_config  = ixgbe_ndo_get_vf_config,
.ndo_get_stats64= ixgbe_get_stats64,
 #ifdef CONFIG_IXGBE_DCB
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 1d17b58..826f88e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -116,6 +116,9 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter 
*adapter)
 * we want to disable the querying by default.
 */
adapter-vfinfo[i].rss_query_enabled = 0;
+
+   /* Untrust all VFs */
+   adapter-vfinfo[i].trusted = false;
}
 
return 0;
@@ -1124,18 +1127,23 @@ void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter)
IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), 0);
 }
 
-void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter)
+static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf)
 {
struct ixgbe_hw *hw = adapter-hw;
u32 ping;
+
+   ping = IXGBE_PF_CONTROL_MSG;
+   if (adapter-vfinfo[vf].clear_to_send)
+   ping |= IXGBE_VT_MSGTYPE_CTS;
+   ixgbe_write_mbx(hw, ping, 1, vf);
+}
+
+void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter)
+{
int i;
 
-   for (i = 0 ; i  adapter-num_vfs; i++) {
-   ping = IXGBE_PF_CONTROL_MSG;
-   if (adapter-vfinfo[i].clear_to_send)
-   ping |= IXGBE_VT_MSGTYPE_CTS;
-   ixgbe_write_mbx(hw, ping, 1, i);
-   }
+   for (i = 0 ; i  adapter-num_vfs; i++)
+   ixgbe_ping_vf(adapter, i);
 }
 
 int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
@@ -1416,6 +1424,28 @@ int ixgbe_ndo_set_vf_rss_query_en(struct net_device 
*netdev, int vf,
return 0;
 }
 
+int ixgbe_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
+{
+   struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+   if (vf = adapter-num_vfs)
+   return -EINVAL;
+
+   /* nothing to do */
+   if (adapter-vfinfo[vf].trusted == setting)
+   return 0;
+
+   adapter-vfinfo[vf].trusted = setting;
+
+   /* reset VF to reconfigure features */
+   adapter-vfinfo[vf].clear_to_send = false;
+   ixgbe_ping_vf(adapter, vf);
+
+   e_info(drv, VF %u is %strusted\n, vf, setting ?  : not );
+
+   return 0;
+}
+
 int ixgbe_ndo_get_vf_config(struct net_device *netdev,
int vf, struct ifla_vf_info *ivi)
 {
@@ -1430,5 +1460,6 @@ int ixgbe_ndo_get_vf_config(struct net_device *netdev,
ivi-qos = adapter-vfinfo[vf].pf_qos;
ivi-spoofchk = adapter-vfinfo[vf].spoofchk_enabled;
ivi-rss_query_en = adapter-vfinfo[vf].rss_query_enabled;
+   ivi-trusted = adapter-vfinfo[vf].trusted;
return 0;
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
index 2c197e6..dad9257 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
@@ -49,6 +49,7 @@ int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, 
int min_tx_rate,
 int 

[PATCH] mac80211: fix locking in update_vlan_tailroom_need_count()

2015-06-17 Thread Johannes Berg
From: Johannes Berg johannes.b...@intel.com

Unfortunately, Michal's change to fix AP_VLAN crypto tailroom
caused a locking issue that was reported by lockdep, but only
in a few cases - the issue was a classic ABBA deadlock caused
by taking the mtx after the key_mtx, where normally they're
taken the other way around.

As the key mutex protects the field in question (I'm adding a
few annotations to make that clear) only the iteration needs
to be protected, but we can also iterate the interface list
with just RCU protection while holding the key mutex.

Fixes: f9dca80b98ca (mac80211: fix AP_VLAN crypto tailroom calculation)
Signed-off-by: Johannes Berg johannes.b...@intel.com
---
Dave, if you're planning to send any more patches to Linus before
the release, please apply this one. Otherwise let me know and I'll
sort it out with Cc stable after the merge window.
---
 net/mac80211/key.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 8abc31ebcf61..b22df3a79a41 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -66,12 +66,15 @@ update_vlan_tailroom_need_count(struct 
ieee80211_sub_if_data *sdata, int delta)
if (sdata-vif.type != NL80211_IFTYPE_AP)
return;
 
-   mutex_lock(sdata-local-mtx);
+   /* crypto_tx_tailroom_needed_cnt is protected by this */
+   assert_key_lock(sdata-local);
+
+   rcu_read_lock();
 
-   list_for_each_entry(vlan, sdata-u.ap.vlans, u.vlan.list)
+   list_for_each_entry_rcu(vlan, sdata-u.ap.vlans, u.vlan.list)
vlan-crypto_tx_tailroom_needed_cnt += delta;
 
-   mutex_unlock(sdata-local-mtx);
+   rcu_read_unlock();
 }
 
 static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
@@ -95,6 +98,8 @@ static void increment_tailroom_need_count(struct 
ieee80211_sub_if_data *sdata)
 * http://mid.gmane.org/1308590980.4322.19.ca...@jlt3.sipsolutions.net
 */
 
+   assert_key_lock(sdata-local);
+
update_vlan_tailroom_need_count(sdata, 1);
 
if (!sdata-crypto_tx_tailroom_needed_cnt++) {
@@ -109,6 +114,8 @@ static void increment_tailroom_need_count(struct 
ieee80211_sub_if_data *sdata)
 static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata,
 int delta)
 {
+   assert_key_lock(sdata-local);
+
WARN_ON_ONCE(sdata-crypto_tx_tailroom_needed_cnt  delta);
 
update_vlan_tailroom_need_count(sdata, -delta);
-- 
2.1.4

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Q] sk-sk_protinfo leftovers

2015-06-17 Thread Denis Kirjanov
Hi,

I've found the old thread about removing sk_protinfo member [0]. Back
in 2005 Ralf mentioned that the ax25 case is more complicated. Have
the things changed in the ax25 code since  that time?

Thanks!

[0]: http://oss.sgi.com/archives/netdev/2005-03/msg03095.html
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] net: Cavium: Bug fix: MAC address setting in shutdown state

2015-06-17 Thread Pavel Fedin
This bug pops up with NetworkManager on Fedora 21. NetworkManager tends to
stop the interface (nicvf_stop() is called) before changing settings. In
stopped state MAC cannot be sent to a PF. However, when the interface is
restarted (nicvf_open() is called), we ping the PF using NIC_MBOX_MSG_READY
message, and the PF replies back with old MAC address, overriding what we had
after MAC setting from userspace. As a result, we cannot set MAC address using
NetworkManager.

This patch introduces special tracking of MAC change in stopped state so that
the correct new MAC address is sent to a PF when interface is reopen.

Signed-off-by: Pavel Fedin p.fe...@samsung.com
---
 drivers/net/ethernet/cavium/thunder/nic.h|  1 +
 drivers/net/ethernet/cavium/thunder/nicvf_main.c | 12 ++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h
b/drivers/net/ethernet/cavium/thunder/nic.h
index a3b43e5..dda8a02 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -263,6 +263,7 @@ struct nicvf {
boolpf_acked;
boolpf_nacked;
boolbgx_stats_acked;
+   boolset_mac_pending;
 } cacheline_aligned_in_smp;
 
 /* PF -- VF Mailbox communication
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 02da802..49d7bcf 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -201,7 +201,8 @@ static void  nicvf_handle_mbx_intr(struct nicvf *nic)
nic-vf_id = mbx.nic_cfg.vf_id  0x7F;
nic-tns_mode = mbx.nic_cfg.tns_mode  0x7F;
nic-node = mbx.nic_cfg.node_id;
-   ether_addr_copy(nic-netdev-dev_addr, mbx.nic_cfg.mac_addr);
+   if (!nic-set_mac_pending)
+   ether_addr_copy(nic-netdev-dev_addr, 
mbx.nic_cfg.mac_addr);
nic-link_up = false;
nic-duplex = 0;
nic-speed = 0;
@@ -941,6 +942,11 @@ int nicvf_open(struct net_device *netdev)
nicvf_hw_set_mac_addr(nic, netdev);
}
 
+   if (nic-set_mac_pending) {
+   nic-set_mac_pending = false;
+   nicvf_hw_set_mac_addr(nic, netdev);
+   }
+
/* Init tasklet for handling Qset err interrupt */
tasklet_init(nic-qs_err_task, nicvf_handle_qs_err,
 (unsigned long)nic);
@@ -1040,9 +1046,11 @@ static int nicvf_set_mac_address(struct net_device 
*netdev, void
*p)
 
memcpy(netdev-dev_addr, addr-sa_data, netdev-addr_len);
 
-   if (nic-msix_enabled)
+   if (nic-msix_enabled) {
if (nicvf_hw_set_mac_addr(nic, netdev))
return -EBUSY;
+   } else
+   nic-set_mac_pending = true;
 
return 0;
 }
-- 
1.9.5.msysgit.0


--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH net] ipv4: include NLM_F_APPEND flag in append route notifications

2015-06-17 Thread Scott Feldman
On Tue, Jun 16, 2015 at 9:11 AM, Roopa Prabhu ro...@cumulusnetworks.com wrote:

[snip]

 @@ -1203,6 +1204,8 @@ int fib_table_insert(struct fib_table *tb, struct 
 fib_config *cfg)

 if (!(cfg-fc_nlflags  NLM_F_APPEND))
 fa = fa_first;
 +   else
 +   nlflags |= NLM_F_APPEND;
 }

The if and else parts above don't seem logically related.  Maybe you
could initialize nlflags as:

unsigned int nlflags = cfg-fc_nlflags  (NLM_F_REPLACE|NLM_F_APPEND);

And then pass rtmsg_fib(..., nlflags) to avoid the flag test/set?
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 0/2] sctp: add new getsockopt option SCTP_SOCKOPT_PEELOFF_KERNEL

2015-06-17 Thread Marcelo Ricardo Leitner

On 17-06-2015 07:21, Neil Horman wrote:

On Tue, Jun 16, 2015 at 07:42:31PM -0300, Marcelo Ricardo Leitner wrote:

Hi,

I'm trying to remove a direct dependency of dlm module on sctp one.
Currently dlm code is calling sctp_do_peeloff() directly and only this
call is causing the load of sctp module together with dlm. For that, we
have basically 3 options:
- Doing a module split on dlm
   - which I'm avoiding because it was already split and was merged (more
 info on patch2 changelog)
   - and the sctp code on it is rather small if compared with sctp module
 itself
- Using some other infra that gets indirectly activated, like getsockopt()
   - It was like this before, but the exposed sockopt created a file
 descriptor for the new socket and that create some serious issues.
 More info on 2f2d76cc3e93 (dlm: Do not allocate a fd for peeloff)
- Doing something like ipv6_stub (which is used by vxlan) or similar
   - but I don't feel that's a good way out here, it doesn't feel right.

So I'm approaching this by going with 2nd option again but this time
also creating a new sockopt that is only accessible for kernel users of
this protocol, so that we are safe to directly return a struct socket *
via getsockopt() results. This is the tricky part of it of this series.

It smells hacky yes but currently most of sctp calls are wrapped behind
kernel_*(). Even if we set a flag (like netlink does) saying that this
is a kernel socket, we still have the issue of getting the function call
through and returning such non-usual return value.

I kept __user marker on sctp_getsockopt_peeloff_kernel() prototype and
its helpers just to avoid issues with static checkers.

Kernel path not really tested yet.. mainly willing to know what do you
think, is this feasible? getsockopt option only reachable by kernel
itself? Couldn't find any other like this.

Thanks,
Marcelo

Marcelo Ricardo Leitner (2):
   sctp: add new getsockopt option SCTP_SOCKOPT_PEELOFF_KERNEL
   dlm: avoid using sctp_do_peeloff directly

  fs/dlm/lowcomms.c | 17 -
  include/uapi/linux/sctp.h | 12 
  net/sctp/socket.c | 39 +++
  3 files changed, 59 insertions(+), 9 deletions(-)

--
2.4.1




Why not just use the existing PEELOFF socket option with the kernel_getsockopt
interface, and sockfd_lookup to translate the returned value back to a socket
struct?  That seems less redundant and less hack-ish to me.


It was like that before commit 2f2d76cc3e93 (dlm: Do not allocate a fd 
for peeloff), but it caused serious issues due to the fd allocation, so 
that's what I'm willing to avoid now.


References:
http://article.gmane.org/gmane.linux.network.drbd/22529
https://bugzilla.redhat.com/show_bug.cgi?id=1075629 (this one is closed, 
sorry)


  Marcelo

--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v6 1/3] if_link: Add control trust VF

2015-06-17 Thread Jeff Kirsher
On Wed, 2015-06-17 at 11:41 +, Hiroshi Shimamoto wrote:
 From: Hiroshi Shimamoto h-shimam...@ct.jp.nec.com
 
 Add netlink directives and ndo entry to trust VF user.
 
 This controls the special permission of VF user.
 The administrator will dedicatedly trust VF user to use some features
 which impacts security and/or performance.
 
 The administrator never turn it on unless VF user is fully trusted.
 
 Signed-off-by: Hiroshi Shimamoto h-shimam...@ct.jp.nec.com
 Reviewed-by: Hayato Momma h-mo...@ce.jp.nec.com
 CC: Choi, Sy Jong sy.jong.c...@intel.com
 ---
 include/linux/if_link.h  |  1 +
  include/linux/netdevice.h|  3 +++
  include/uapi/linux/if_link.h |  6 ++
  net/core/rtnetlink.c | 19 +--
  4 files changed, 27 insertions(+), 2 deletions(-)

Thanks Hiroshi, if there are no comments for change, I will get this
series applied to my next-queue for testing.


signature.asc
Description: This is a digitally signed message part


  1   2   3   >