[PATCH net-next] virtio-net: avoid unnecessary sg initialzation
Usually an skb does not have up to MAX_SKB_FRAGS frags. So no need to initialize the unuse part of sg. This patch initialize the sg based on the real number it will used: - during xmit, it could be inferred from nr_frags and can_push. - for small receive buffer, it will also be 2. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang --- drivers/net/virtio_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 53f5660..c006ae4 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -540,7 +540,7 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, skb_put(skb, GOOD_PACKET_LEN); hdr = skb_vnet_hdr(skb); - sg_init_table(rq->sg, MAX_SKB_FRAGS + 2); + sg_init_table(rq->sg, 2); sg_set_buf(rq->sg, hdr, vi->hdr_len); skb_to_sgvec(skb, rq->sg + 1, 0, skb->len); @@ -893,7 +893,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) if (vi->mergeable_rx_bufs) hdr->num_buffers = 0; - sg_init_table(sq->sg, MAX_SKB_FRAGS + 2); + sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2)); if (can_push) { __skb_push(skb, hdr_len); num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 net-next 4/8] geneve: Make dst-port configurable.
Add netlink interface to configure Geneve UDP port number. So that user can configure it for a Gevene device. Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross Acked-by: Thomas Graf Acked-by: John W. Linville --- drivers/net/geneve.c | 25 + include/uapi/linux/if_link.h |1 + 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 3c5b2b1..0a6d974 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -49,6 +49,7 @@ struct geneve_dev { u8 tos; /* TOS override */ struct sockaddr_in remote; /* IPv4 address for link partner */ struct list_head next;/* geneve's per namespace list */ + __be16 dst_port; }; static int geneve_net_id; @@ -64,6 +65,7 @@ static inline __u32 geneve_net_vni_hash(u8 vni[3]) /* geneve receive/decap routine */ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) { + struct inet_sock *sk = inet_sk(gs->sock->sk); struct genevehdr *gnvh = geneve_hdr(skb); struct geneve_dev *dummy, *geneve = NULL; struct geneve_net *gn; @@ -82,7 +84,8 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) vni_list_head = &gn->vni_list[hash]; hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) { if (!memcmp(gnvh->vni, dummy->vni, sizeof(dummy->vni)) && - iph->saddr == dummy->remote.sin_addr.s_addr) { + iph->saddr == dummy->remote.sin_addr.s_addr && + sk->inet_sport == dummy->dst_port) { geneve = dummy; break; } @@ -157,7 +160,7 @@ static int geneve_open(struct net_device *dev) struct geneve_net *gn = net_generic(geneve->net, geneve_net_id); struct geneve_sock *gs; - gs = geneve_sock_add(net, htons(GENEVE_UDP_PORT), geneve_rx, gn, + gs = geneve_sock_add(net, geneve->dst_port, geneve_rx, gn, false, false); if (IS_ERR(gs)) return PTR_ERR(gs); @@ -228,7 +231,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) /* no need to handle local destination and encap bypass...yet... */ err = geneve_xmit_skb(gs, rt, skb, fl4.saddr, fl4.daddr, - tos, ttl, 0, sport, htons(GENEVE_UDP_PORT), 0, + tos, ttl, 0, sport, geneve->dst_port, 0, geneve->vni, 0, NULL, false, !net_eq(geneve->net, dev_net(geneve->dev))); if (err < 0) @@ -308,6 +311,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { [IFLA_GENEVE_REMOTE]= { .len = FIELD_SIZEOF(struct iphdr, daddr) }, [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, + [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, }; static int geneve_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -341,6 +345,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev, struct hlist_head *vni_list_head; struct sockaddr_in remote; /* IPv4 address for link partner */ __u32 vni, hash; + __be16 dst_port; int err; if (!data[IFLA_GENEVE_ID] || !data[IFLA_GENEVE_REMOTE]) @@ -359,13 +364,20 @@ static int geneve_newlink(struct net *net, struct net_device *dev, if (IN_MULTICAST(ntohl(geneve->remote.sin_addr.s_addr))) return -EINVAL; + if (data[IFLA_GENEVE_PORT]) + dst_port = htons(nla_get_u16(data[IFLA_GENEVE_PORT])); + else + dst_port = htons(GENEVE_UDP_PORT); + remote = geneve->remote; hash = geneve_net_vni_hash(geneve->vni); vni_list_head = &gn->vni_list[hash]; hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) { if (!memcmp(geneve->vni, dummy->vni, sizeof(dummy->vni)) && - !memcmp(&remote, &dummy->remote, sizeof(dummy->remote))) + !memcmp(&remote, &dummy->remote, sizeof(dummy->remote)) && + dst_port == dummy->dst_port) { return -EBUSY; + } } err = register_netdevice(dev); @@ -378,6 +390,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev, if (data[IFLA_GENEVE_TOS]) geneve->tos = nla_get_u8(data[IFLA_GENEVE_TOS]); + geneve->dst_port = dst_port; list_add(&geneve->next, &gn->geneve_list); hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]); @@ -402,6 +415,7 @@ static size_t geneve_get_size(const struct net_device *dev) nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */ nla_total_size(sizeof(__u8)) + /* IFLA_
[PATCH v5 net-next 3/8] tunnel: introduce udp_tun_rx_dst()
Introduce function udp_tun_rx_dst() to initialize tunnel dst on receive path. Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross Acked-by: Thomas Graf --- drivers/net/vxlan.c| 29 ++-- include/net/dst_metadata.h | 61 include/net/udp_tunnel.h |4 +++ net/ipv4/ip_gre.c | 21 +++--- net/ipv4/udp_tunnel.c | 25 +- 5 files changed, 97 insertions(+), 43 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 61b457b..5b4cf66 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1264,36 +1264,13 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) } if (vxlan_collect_metadata(vs)) { - tun_dst = metadata_dst_alloc(sizeof(*md), GFP_ATOMIC); + tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, +cpu_to_be64(vni >> 8), sizeof(*md)); + if (!tun_dst) goto drop; info = &tun_dst->u.tun_info; - if (vxlan_get_sk_family(vs) == AF_INET) { - const struct iphdr *iph = ip_hdr(skb); - - info->key.u.ipv4.src = iph->saddr; - info->key.u.ipv4.dst = iph->daddr; - info->key.tos = iph->tos; - info->key.ttl = iph->ttl; - } else { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); - - info->key.u.ipv6.src = ip6h->saddr; - info->key.u.ipv6.dst = ip6h->daddr; - info->key.tos = ipv6_get_dsfield(ip6h); - info->key.ttl = ip6h->hop_limit; - } - - info->key.tp_src = udp_hdr(skb)->source; - info->key.tp_dst = udp_hdr(skb)->dest; - - info->mode = IP_TUNNEL_INFO_RX; - info->key.tun_flags = TUNNEL_KEY; - info->key.tun_id = cpu_to_be64(vni >> 8); - if (udp_hdr(skb)->check != 0) - info->key.tun_flags |= TUNNEL_CSUM; - md = ip_tunnel_info_opts(info, sizeof(*md)); } else { memset(md, 0, sizeof(*md)); diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 2cb52d5..60c0332 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -48,4 +48,65 @@ static inline bool skb_valid_dst(const struct sk_buff *skb) struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags); +static inline struct metadata_dst *tun_rx_dst(__be16 flags, + __be64 tunnel_id, int md_size) +{ + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + tun_dst = metadata_dst_alloc(md_size, GFP_ATOMIC); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->mode = IP_TUNNEL_INFO_RX; + info->key.tun_flags = flags; + info->key.tun_id = tunnel_id; + info->key.tp_src = 0; + info->key.tp_dst = 0; + return tun_dst; +} + +static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, +__be16 flags, +__be64 tunnel_id, +int md_size) +{ + const struct iphdr *iph = ip_hdr(skb); + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + tun_dst = tun_rx_dst(flags, tunnel_id, md_size); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->key.u.ipv4.src = iph->saddr; + info->key.u.ipv4.dst = iph->daddr; + info->key.tos = iph->tos; + info->key.ttl = iph->ttl; + return tun_dst; +} + +static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, +__be16 flags, +__be64 tunnel_id, +int md_size) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + tun_dst = tun_rx_dst(flags, tunnel_id, md_size); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->key.u.ipv6.src = ip6h->saddr; + info->key.u.ipv6.dst = ip6h->daddr; + info->key.tos = ipv6_get_dsfield(ip6h); + info->key.ttl = ip6h->hop_limit; + return tun_dst; +} + #endif /* __NET_DST_METADATA_H */ diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index c491c12..35041d0 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -93,6 +93,10 @@ int udp_tunnel6_xmit_skb
[PATCH v5 net-next 6/8] openvswitch: Use Geneve device.
With help of tunnel metadata mode OVS can directly use Geneve devices to implement Geneve tunnels. This patch removes all of the OVS specific Geneve code and make OVS use a Geneve net_device. Basic geneve vport is still there to handle compatibility with current userspace application. Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross --- net/openvswitch/Kconfig|2 +- net/openvswitch/vport-geneve.c | 179 +++ 2 files changed, 33 insertions(+), 148 deletions(-) diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 422dc05..87b98c0 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -59,7 +59,7 @@ config OPENVSWITCH_VXLAN config OPENVSWITCH_GENEVE tristate "Open vSwitch Geneve tunneling support" depends on OPENVSWITCH - depends on GENEVE_CORE + depends on GENEVE default OPENVSWITCH ---help--- If you say Y here, then the Open vSwitch will be able create geneve vport. diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index d01bd63..fa37c95 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -26,95 +26,44 @@ #include "datapath.h" #include "vport.h" +#include "vport-netdev.h" static struct vport_ops ovs_geneve_vport_ops; - /** * struct geneve_port - Keeps track of open UDP ports - * @gs: The socket created for this port number. - * @name: vport name. + * @dst_port: destination port. */ struct geneve_port { - struct geneve_sock *gs; - char name[IFNAMSIZ]; + u16 port_no; }; -static LIST_HEAD(geneve_ports); - static inline struct geneve_port *geneve_vport(const struct vport *vport) { return vport_priv(vport); } -/* Convert 64 bit tunnel ID to 24 bit VNI. */ -static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) -{ -#ifdef __BIG_ENDIAN - vni[0] = (__force __u8)(tun_id >> 16); - vni[1] = (__force __u8)(tun_id >> 8); - vni[2] = (__force __u8)tun_id; -#else - vni[0] = (__force __u8)((__force u64)tun_id >> 40); - vni[1] = (__force __u8)((__force u64)tun_id >> 48); - vni[2] = (__force __u8)((__force u64)tun_id >> 56); -#endif -} - -/* Convert 24 bit VNI to 64 bit tunnel ID. */ -static __be64 vni_to_tunnel_id(const __u8 *vni) -{ -#ifdef __BIG_ENDIAN - return (vni[0] << 16) | (vni[1] << 8) | vni[2]; -#else - return (__force __be64)(((__force u64)vni[0] << 40) | - ((__force u64)vni[1] << 48) | - ((__force u64)vni[2] << 56)); -#endif -} - -static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) -{ - struct vport *vport = gs->rcv_data; - struct genevehdr *geneveh = geneve_hdr(skb); - int opts_len; - struct ip_tunnel_info tun_info; - __be64 key; - __be16 flags; - - opts_len = geneveh->opt_len * 4; - - flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | - (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | - (geneveh->oam ? TUNNEL_OAM : 0) | - (geneveh->critical ? TUNNEL_CRIT_OPT : 0); - - key = vni_to_tunnel_id(geneveh->vni); - - ip_tunnel_info_init(&tun_info, ip_hdr(skb), - udp_hdr(skb)->source, udp_hdr(skb)->dest, - key, flags, geneveh->options, opts_len); - - ovs_vport_receive(vport, skb, &tun_info); -} - static int geneve_get_options(const struct vport *vport, struct sk_buff *skb) { struct geneve_port *geneve_port = geneve_vport(vport); - struct inet_sock *sk = inet_sk(geneve_port->gs->sock->sk); - if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(sk->inet_sport))) + if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->port_no)) return -EMSGSIZE; return 0; } -static void geneve_tnl_destroy(struct vport *vport) +static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ip_tunnel_info *egress_tun_info) { struct geneve_port *geneve_port = geneve_vport(vport); + struct net *net = ovs_dp_get_net(vport->dp); + __be16 dport = htons(geneve_port->port_no); + __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); - geneve_sock_release(geneve_port->gs); - - ovs_vport_deferred_free(vport); + return ovs_tunnel_get_egress_info(egress_tun_info, + ovs_dp_get_net(vport->dp), + OVS_CB(skb)->egress_tun_info, + IPPROTO_UDP, skb->mark, sport, dport); } static struct vport *geneve_tnl_create(const struct vport_parms *parms) @@ -122,11 +71,11 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) struct net *net = ovs_dp_get_net(parms->dp); struct nlattr *options
[PATCH v5 net-next 7/8] geneve: Consolidate Geneve functionality in single module.
geneve_core module handles send and receive functionality. This way OVS could use the Geneve API. Now with use of tunnel meatadata mode OVS can directly use Geneve netdevice. So there is no need for separate module for Geneve. Following patch consolidates Geneve protocol processing in single module. Signed-off-by: Pravin B Shelar --- v4-v5: - Fix xnet for collect-md tunnels v3-v4: - Fixed newlink() validation. v2-v3: - Fixed Kconfig dependency. - unified geneve_build_skb() - Fixed geneve_build_skb() error path. --- drivers/net/Kconfig|4 +- drivers/net/geneve.c | 507 +++- include/net/geneve.h | 34 net/ipv4/Kconfig | 14 -- net/ipv4/Makefile |1 - net/ipv4/geneve_core.c | 447 -- 6 files changed, 421 insertions(+), 586 deletions(-) delete mode 100644 net/ipv4/geneve_core.c diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 770483b..d18eb60 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -180,8 +180,8 @@ config VXLAN will be called vxlan. config GENEVE - tristate "Generic Network Virtualization Encapsulation netdev" - depends on INET && GENEVE_CORE + tristate "Generic Network Virtualization Encapsulation" + depends on INET && NET_UDP_TUNNEL select NET_IP_TUNNEL ---help--- This allows one to create geneve virtual interfaces that provide diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index d05150c..90d4d43 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -18,6 +18,7 @@ #include #include #include +#include #define GENEVE_NETDEV_VER "0.6" @@ -33,13 +34,18 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); +#define GENEVE_VER 0 +#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) + /* per-network namespace private data for this module */ struct geneve_net { - struct list_head geneve_list; - struct hlist_head vni_list[VNI_HASH_SIZE]; - struct geneve_dev __rcu *collect_md_tun; + struct list_headgeneve_list; + struct hlist_head vni_list[VNI_HASH_SIZE]; + struct list_headsock_list; }; +static int geneve_net_id; + /* Pseudo network device */ struct geneve_dev { struct hlist_node hlist; /* vni hash table */ @@ -55,7 +61,15 @@ struct geneve_dev { bool collect_md; }; -static int geneve_net_id; +struct geneve_sock { + boolcollect_md; + struct geneve_net *gn; + struct list_headlist; + struct socket *sock; + struct rcu_head rcu; + int refcnt; + struct udp_offload udp_offloads; +}; static inline __u32 geneve_net_vni_hash(u8 vni[3]) { @@ -76,51 +90,62 @@ static __be64 vni_to_tunnel_id(const __u8 *vni) #endif } -static struct geneve_dev *geneve_lookup(struct geneve_net *gn, - struct geneve_sock *gs, - struct iphdr *iph, - struct genevehdr *gnvh) +static struct geneve_dev *geneve_lookup(struct geneve_net *gn, __be16 port, + __be32 addr, u8 vni[]) { - struct inet_sock *sk = inet_sk(gs->sock->sk); struct hlist_head *vni_list_head; struct geneve_dev *geneve; __u32 hash; - geneve = rcu_dereference(gn->collect_md_tun); - if (geneve) - return geneve; - /* Find the device for this VNI */ - hash = geneve_net_vni_hash(gnvh->vni); + hash = geneve_net_vni_hash(vni); vni_list_head = &gn->vni_list[hash]; hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { - if (!memcmp(gnvh->vni, geneve->vni, sizeof(geneve->vni)) && - iph->saddr == geneve->remote.sin_addr.s_addr && - sk->inet_sport == geneve->dst_port) { + if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && + addr == geneve->remote.sin_addr.s_addr && + port == geneve->dst_port) { return geneve; } } return NULL; } +static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) +{ + return (struct genevehdr *)(udp_hdr(skb) + 1); +} + /* geneve receive/decap routine */ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) { + struct inet_sock *sk = inet_sk(gs->sock->sk); struct genevehdr *gnvh = geneve_hdr(skb); + struct geneve_net *gn = gs->gn; struct metadata_dst *tun_dst = NULL; struct geneve_dev *geneve = NULL; struct pcpu_sw_netstats *stats; - struct geneve_net *gn; struct iphdr *iph; +
[PATCH v5 net-next 5/8] geneve: Add support to collect tunnel metadata.
Following patch create new tunnel flag which enable tunnel metadata collection on given device. These devices can be used by tunnel metadata based routing or by OVS. Geneve Consolidation patch get rid of collect_md_tun to simplify tunnel lookup further. Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross --- v3-v4: - Do not set NETIF_F_NETNS_LOCAL v2-v3: - Do not allow regular and metadata tunnel devices on same port. --- drivers/net/geneve.c | 356 -- include/net/geneve.h |3 + include/uapi/linux/if_link.h |1 + 3 files changed, 275 insertions(+), 85 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 0a6d974..d05150c 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -36,6 +37,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); struct geneve_net { struct list_head geneve_list; struct hlist_head vni_list[VNI_HASH_SIZE]; + struct geneve_dev __rcu *collect_md_tun; }; /* Pseudo network device */ @@ -50,6 +52,7 @@ struct geneve_dev { struct sockaddr_in remote; /* IPv4 address for link partner */ struct list_head next;/* geneve's per namespace list */ __be16 dst_port; + bool collect_md; }; static int geneve_net_id; @@ -62,48 +65,95 @@ static inline __u32 geneve_net_vni_hash(u8 vni[3]) return hash_32(vnid, VNI_HASH_BITS); } -/* geneve receive/decap routine */ -static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) +static __be64 vni_to_tunnel_id(const __u8 *vni) +{ +#ifdef __BIG_ENDIAN + return (vni[0] << 16) | (vni[1] << 8) | vni[2]; +#else + return (__force __be64)(((__force u64)vni[0] << 40) | + ((__force u64)vni[1] << 48) | + ((__force u64)vni[2] << 56)); +#endif +} + +static struct geneve_dev *geneve_lookup(struct geneve_net *gn, + struct geneve_sock *gs, + struct iphdr *iph, + struct genevehdr *gnvh) { struct inet_sock *sk = inet_sk(gs->sock->sk); - struct genevehdr *gnvh = geneve_hdr(skb); - struct geneve_dev *dummy, *geneve = NULL; - struct geneve_net *gn; - struct iphdr *iph = NULL; - struct pcpu_sw_netstats *stats; struct hlist_head *vni_list_head; - int err = 0; + struct geneve_dev *geneve; __u32 hash; - iph = ip_hdr(skb); /* Still outer IP header... */ - - gn = gs->rcv_data; + geneve = rcu_dereference(gn->collect_md_tun); + if (geneve) + return geneve; /* Find the device for this VNI */ hash = geneve_net_vni_hash(gnvh->vni); vni_list_head = &gn->vni_list[hash]; - hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) { - if (!memcmp(gnvh->vni, dummy->vni, sizeof(dummy->vni)) && - iph->saddr == dummy->remote.sin_addr.s_addr && - sk->inet_sport == dummy->dst_port) { - geneve = dummy; - break; + hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { + if (!memcmp(gnvh->vni, geneve->vni, sizeof(geneve->vni)) && + iph->saddr == geneve->remote.sin_addr.s_addr && + sk->inet_sport == geneve->dst_port) { + return geneve; } } + return NULL; +} + +/* geneve receive/decap routine */ +static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) +{ + struct genevehdr *gnvh = geneve_hdr(skb); + struct metadata_dst *tun_dst = NULL; + struct geneve_dev *geneve = NULL; + struct pcpu_sw_netstats *stats; + struct geneve_net *gn; + struct iphdr *iph; + int err; + + iph = ip_hdr(skb); /* Still outer IP header... */ + gn = gs->rcv_data; + geneve = geneve_lookup(gn, gs, iph, gnvh); if (!geneve) goto drop; - /* Drop packets w/ critical options, -* since we don't support any... -*/ - if (gnvh->critical) - goto drop; + if (ip_tunnel_collect_metadata() || geneve->collect_md) { + __be16 flags; + void *opts; + + flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | + (gnvh->oam ? TUNNEL_OAM : 0) | + (gnvh->critical ? TUNNEL_CRIT_OPT : 0); + + tun_dst = udp_tun_rx_dst(skb, AF_INET, flags, +vni_to_tunnel_id(gnvh->vni), +gnvh->opt_len * 4); + if (!tun_dst) + goto drop; + + /* Update tunnel dst according to Geneve opti
[PATCH v5 net-next 2/8] geneve: Use skb mark and protocol to lookup route.
On packet transmit path geneve need to lookup route. Following patch improves route lookup using more parameters. Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross Acked-by: Thomas Graf Acked-by: John W. Linville --- drivers/net/geneve.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 95e9da0..3c5b2b1 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -202,6 +202,9 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) memset(&fl4, 0, sizeof(fl4)); fl4.flowi4_tos = RT_TOS(tos); fl4.daddr = geneve->remote.sin_addr.s_addr; + fl4.flowi4_mark = skb->mark; + fl4.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(geneve->net, &fl4); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 net-next 8/8] geneve: Move device hash table to geneve socket.
This change simplifies Geneve Tunnel hash table management. Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross Reviewed-by: John W. Linville --- drivers/net/geneve.c | 43 +-- 1 files changed, 17 insertions(+), 26 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 90d4d43..4357bae 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -40,7 +40,6 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); /* per-network namespace private data for this module */ struct geneve_net { struct list_headgeneve_list; - struct hlist_head vni_list[VNI_HASH_SIZE]; struct list_headsock_list; }; @@ -63,12 +62,12 @@ struct geneve_dev { struct geneve_sock { boolcollect_md; - struct geneve_net *gn; struct list_headlist; struct socket *sock; struct rcu_head rcu; int refcnt; struct udp_offload udp_offloads; + struct hlist_head vni_list[VNI_HASH_SIZE]; }; static inline __u32 geneve_net_vni_hash(u8 vni[3]) @@ -90,7 +89,7 @@ static __be64 vni_to_tunnel_id(const __u8 *vni) #endif } -static struct geneve_dev *geneve_lookup(struct geneve_net *gn, __be16 port, +static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, __be32 addr, u8 vni[]) { struct hlist_head *vni_list_head; @@ -99,13 +98,11 @@ static struct geneve_dev *geneve_lookup(struct geneve_net *gn, __be16 port, /* Find the device for this VNI */ hash = geneve_net_vni_hash(vni); - vni_list_head = &gn->vni_list[hash]; + vni_list_head = &gs->vni_list[hash]; hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && - addr == geneve->remote.sin_addr.s_addr && - port == geneve->dst_port) { + addr == geneve->remote.sin_addr.s_addr) return geneve; - } } return NULL; } @@ -118,9 +115,7 @@ static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) /* geneve receive/decap routine */ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) { - struct inet_sock *sk = inet_sk(gs->sock->sk); struct genevehdr *gnvh = geneve_hdr(skb); - struct geneve_net *gn = gs->gn; struct metadata_dst *tun_dst = NULL; struct geneve_dev *geneve = NULL; struct pcpu_sw_netstats *stats; @@ -129,8 +124,6 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) __be32 addr; int err; - iph = ip_hdr(skb); /* Still outer IP header... */ - if (gs->collect_md) { static u8 zero_vni[3]; @@ -138,10 +131,11 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) addr = 0; } else { vni = gnvh->vni; + iph = ip_hdr(skb); /* Still outer IP header... */ addr = iph->saddr; } - geneve = geneve_lookup(gn, sk->inet_sport, addr, vni); + geneve = geneve_lookup(gs, addr, vni); if (!geneve) goto drop; @@ -410,6 +404,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, struct geneve_sock *gs; struct socket *sock; struct udp_tunnel_sock_cfg tunnel_cfg; + int h; gs = kzalloc(sizeof(*gs), GFP_KERNEL); if (!gs) @@ -423,7 +418,8 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, gs->sock = sock; gs->refcnt = 1; - gs->gn = gn; + for (h = 0; h < VNI_HASH_SIZE; ++h) + INIT_HLIST_HEAD(&gs->vni_list[h]); /* Initialize the geneve udp offloads structure */ gs->udp_offloads.port = port; @@ -437,7 +433,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, tunnel_cfg.encap_rcv = geneve_udp_encap_recv; tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tunnel_cfg); - list_add(&gs->list, &gn->sock_list); return gs; } @@ -482,6 +477,7 @@ static int geneve_open(struct net_device *dev) struct net *net = geneve->net; struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; + __u32 hash; gs = geneve_find_sock(gn, geneve->dst_port); if (gs) { @@ -496,14 +492,20 @@ static int geneve_open(struct net_device *dev) out: gs->collect_md = geneve->collect_md; geneve->sock = gs; + + hash = geneve_net_vni_hash(geneve->vni); + hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]); return 0; } static int geneve_stop(struct net_device *dev) { struct geneve_de
[PATCH v5 net-next 1/8] geneve: Initialize ethernet address in device setup.
Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross Acked-by: Thomas Graf Acked-by: John W. Linville --- drivers/net/geneve.c |4 +--- 1 files changed, 1 insertions(+), 3 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 897e1a3..95e9da0 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -297,6 +297,7 @@ static void geneve_setup(struct net_device *dev) netif_keep_dst(dev); dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; + eth_hw_addr_random(dev); } static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { @@ -364,9 +365,6 @@ static int geneve_newlink(struct net *net, struct net_device *dev, return -EBUSY; } - if (tb[IFLA_ADDRESS] == NULL) - eth_hw_addr_random(dev); - err = register_netdevice(dev); if (err) return err; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 net-next 0/8] geneve: Add support for tunnel metadata mode
Following patches adds support for Geneve tunnel metadata mode. OVS can make use of Geneve net-device with tunnel metadata API from kernel. This also allows us to consolidate Geneve implementation from two kernel modules geneve_core and geneve to single geneve module. geneve_core module was targeted to share Geneve encap and decap code between Geneve netdevice and OVS Geneve tunnel implementation, Since OVS no longer needs these API, Geneve code can be consolidated into single geneve module. v4-v5: - Fix xnet calculation. v3-v4: - Drop NETIF_F_NETNS_LOCAL feature. - Fix geneve device newlink check v2-v3: - make tunnel medata device and regular device mutually exclusive. - Fix Kconfig dependency for Geneve. - Fix dst-port netlink encoding. - drop changelink patch. v1-v2: - Replaced per hash table tunnel pointer (metadata enabled) with flag. - Added support for changelink. - Improve geneve device route lookup with more parameters. Pravin B Shelar (8): geneve: Initialize ethernet address in device setup. geneve: Use skb mark and protocol to lookup route. tunnel: introduce udp_tun_rx_dst() geneve: Make dst-port configurable. geneve: Add support to collect tunnel metadata. openvswitch: Use Geneve device. geneve: Consolidate Geneve functionality in single module. geneve: Move device hash table to geneve socket. drivers/net/Kconfig|4 +- drivers/net/geneve.c | 736 ++-- drivers/net/vxlan.c| 29 +-- include/net/dst_metadata.h | 61 include/net/geneve.h | 35 +-- include/net/udp_tunnel.h |4 + include/uapi/linux/if_link.h |2 + net/ipv4/Kconfig | 14 - net/ipv4/Makefile |1 - net/ipv4/geneve_core.c | 447 net/ipv4/ip_gre.c | 21 +- net/ipv4/udp_tunnel.c | 25 ++- net/openvswitch/Kconfig|2 +- net/openvswitch/vport-geneve.c | 179 ++ 14 files changed, 767 insertions(+), 793 deletions(-) delete mode 100644 net/ipv4/geneve_core.c -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch net-next 2/3] mlxsw: expose EMAD transactions statistics via debugfs
Thu, Aug 27, 2015 at 08:36:03AM CEST, da...@davemloft.net wrote: >From: Jiri Pirko >Date: Thu, 27 Aug 2015 08:27:04 +0200 > >> I'm not saying it is not possible, it certainly is. But I think that >> for example rocker internals have no value to default user, he >> should not care and he cannot find out what is going on there >> without knowledge or rocker.c code. The question is, do we need some >> standard interface to expose random debugging data? I don't think >> so, I think that debugfs is exactly the tool to be used in that >> case. > >If it is only interesting to rocker.c maintainer, he can keep a local >patch he applies when he needs such a facility. > >This discussion is becomming circular. > >If it's useful, it needs a well defined interface. > >If it's not useful, it doesn't belong in the tree. > >Therefore, debugfs is useless. Fair enough. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch net-next 2/3] mlxsw: expose EMAD transactions statistics via debugfs
From: Jiri Pirko Date: Thu, 27 Aug 2015 08:27:04 +0200 > I'm not saying it is not possible, it certainly is. But I think that > for example rocker internals have no value to default user, he > should not care and he cannot find out what is going on there > without knowledge or rocker.c code. The question is, do we need some > standard interface to expose random debugging data? I don't think > so, I think that debugfs is exactly the tool to be used in that > case. If it is only interesting to rocker.c maintainer, he can keep a local patch he applies when he needs such a facility. This discussion is becomming circular. If it's useful, it needs a well defined interface. If it's not useful, it doesn't belong in the tree. Therefore, debugfs is useless. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 net-next] bridge: Add netlink support for vlan_protocol attribute
This enables bridge vlan_protocol to be configured through netlink. When CONFIG_BRIDGE_VLAN_FILTERING is disabled, kernel behaves the same way as this feature is not implemented. Signed-off-by: Toshiaki Makita --- v2: Fix u16 to __be16 include/uapi/linux/if_link.h | 1 + net/bridge/br_netlink.c | 34 ++ net/bridge/br_private.h | 1 + net/bridge/br_vlan.c | 35 +-- 4 files changed, 57 insertions(+), 14 deletions(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 313c305..2d13dd4 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -231,6 +231,7 @@ enum { IFLA_BR_STP_STATE, IFLA_BR_PRIORITY, IFLA_BR_VLAN_FILTERING, + IFLA_BR_VLAN_PROTOCOL, __IFLA_BR_MAX, }; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index dbcb194..9cea3cf 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -673,6 +673,21 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return -EADDRNOTAVAIL; } + if (!data) + return 0; + +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (data[IFLA_BR_VLAN_PROTOCOL]) { + switch (nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL])) { + case htons(ETH_P_8021Q): + case htons(ETH_P_8021AD): + break; + default: + return -EPROTONOSUPPORT; + } + } +#endif + return 0; } @@ -729,6 +744,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_STP_STATE] = { .type = NLA_U32 }, [IFLA_BR_PRIORITY] = { .type = NLA_U16 }, [IFLA_BR_VLAN_FILTERING] = { .type = NLA_U8 }, + [IFLA_BR_VLAN_PROTOCOL] = { .type = NLA_U16 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -784,6 +800,16 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return err; } +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (data[IFLA_BR_VLAN_PROTOCOL]) { + __be16 vlan_proto = nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL]); + + err = __br_vlan_set_proto(br, vlan_proto); + if (err) + return err; + } +#endif + return 0; } @@ -796,6 +822,9 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size(sizeof(u32)) +/* IFLA_BR_STP_STATE */ nla_total_size(sizeof(u16)) +/* IFLA_BR_PRIORITY */ nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_FILTERING */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */ +#endif 0; } @@ -819,6 +848,11 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_VLAN_FILTERING, vlan_enabled)) return -EMSGSIZE; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto)) + return -EMSGSIZE; +#endif + return 0; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3d95647..19e8f79 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -616,6 +616,7 @@ bool br_vlan_find(struct net_bridge *br, u16 vid); void br_recalculate_fwd_mask(struct net_bridge *br); int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); +int __br_vlan_set_proto(struct net_bridge *br, __be16 proto); int br_vlan_set_proto(struct net_bridge *br, unsigned long val); int br_vlan_init(struct net_bridge *br); int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 3cef689..3cd8cc9 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -492,23 +492,16 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) return 0; } -int br_vlan_set_proto(struct net_bridge *br, unsigned long val) +int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) { int err = 0; struct net_bridge_port *p; struct net_port_vlans *pv; - __be16 proto, oldproto; + __be16 oldproto; u16 vid, errvid; - if (val != ETH_P_8021Q && val != ETH_P_8021AD) - return -EPROTONOSUPPORT; - - if (!rtnl_trylock()) - return restart_syscall(); - - proto = htons(val); if (br->vlan_proto == proto) - goto unlock; + return 0; /* Add VLANs for the new proto to the device filter. */ list_for_each_entry(p, &br->port_list, list) { @@ -539,9 +532,7 @@ int br_vlan_set_proto(struct net_bridge *br, unsigned long val)
Re: [patch net-next 3/6] net: add netif_is_ovs_master helper with IFF_OPENVSWITCH private flag
Thu, Aug 27, 2015 at 08:23:13AM CEST, sfel...@gmail.com wrote: >On Wed, Aug 26, 2015 at 10:43 PM, Jiri Pirko wrote: >> Wed, Aug 26, 2015 at 07:43:18PM CEST, sfel...@gmail.com wrote: >>>On Wed, Aug 26, 2015 at 9:36 AM, Jiri Pirko wrote: From: Jiri Pirko Add this helper so code can easily figure out if netdev is openswitch. Signed-off-by: Jiri Pirko --- include/linux/netdevice.h| 8 net/openvswitch/vport-internal_dev.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index be625f4..0a884e6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1264,6 +1264,7 @@ struct net_device_ops { * @IFF_MACVLAN: Macvlan device * @IFF_VRF_MASTER: device is a VRF master * @IFF_NO_QUEUE: device can run without qdisc attached + * @IFF_VRF_OPENVSWITCH: device is a Open vSwitch master */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1293,6 +1294,7 @@ enum netdev_priv_flags { IFF_IPVLAN_SLAVE= 1<<24, IFF_VRF_MASTER = 1<<25, IFF_NO_QUEUE= 1<<26, + IFF_OPENVSWITCH = 1<<27, }; #define IFF_802_1Q_VLANIFF_802_1Q_VLAN @@ -1322,6 +1324,7 @@ enum netdev_priv_flags { #define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE #define IFF_VRF_MASTER IFF_VRF_MASTER #define IFF_NO_QUEUE IFF_NO_QUEUE +#define IFF_OPENVSWITCHIFF_OPENVSWITCH /** * struct net_device - The DEVICE structure. @@ -3853,6 +3856,11 @@ static inline bool netif_is_bridge_master(const struct net_device *dev) return dev->priv_flags & IFF_EBRIDGE; } +static inline bool netif_is_ovs_master(const struct net_device *dev) +{ + return dev->priv_flags & IFF_OPENVSWITCH; +} >>> >>>We're going to run out of priv_flags bits. This flag doesn't seem >>>like something that will be checked lots of places. How about using >>>rtnl_link_ops->kind to save a bit in priv_flags? >>> >>>static inline bool netif_is_ovs_master(const struct net_device *dev) >>>{ >>>return !strcmp(dev->rtnl_link_ops->kind, "openvswitch")); >>>} >> >> There are lot of helpers like this for other soft-devices. I think that >> is okay to have it this way. The thing is that sometimes you need to use >> thi helper in fast path and in that case, you do not want to strcmp. >> >> There is plenty of priv_flags bits for now when I killed the bonding >> stuff. > >Ya, but think about the bit: you (and others) used a bit in priv_flags >to indicate the netdev type. Can you add an enum field to >rtnl_link_ops->type to indicate link type? Then it's not a strcmp. >You can write your helper using strcmp first, and then later migrate >to using rtnl_link_ops->type. But how different this would be to the priv_flags? You have to have "central storage" for these types anyway, same as for flags. + it's one more pointer dereference and null check on fastpath. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch net-next 3/6] net: add netif_is_ovs_master helper with IFF_OPENVSWITCH private flag
Thu, Aug 27, 2015 at 08:23:13AM CEST, sfel...@gmail.com wrote: >On Wed, Aug 26, 2015 at 10:43 PM, Jiri Pirko wrote: >> Wed, Aug 26, 2015 at 07:43:18PM CEST, sfel...@gmail.com wrote: >>>On Wed, Aug 26, 2015 at 9:36 AM, Jiri Pirko wrote: From: Jiri Pirko Add this helper so code can easily figure out if netdev is openswitch. Signed-off-by: Jiri Pirko --- include/linux/netdevice.h| 8 net/openvswitch/vport-internal_dev.c | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index be625f4..0a884e6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1264,6 +1264,7 @@ struct net_device_ops { * @IFF_MACVLAN: Macvlan device * @IFF_VRF_MASTER: device is a VRF master * @IFF_NO_QUEUE: device can run without qdisc attached + * @IFF_VRF_OPENVSWITCH: device is a Open vSwitch master */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1293,6 +1294,7 @@ enum netdev_priv_flags { IFF_IPVLAN_SLAVE= 1<<24, IFF_VRF_MASTER = 1<<25, IFF_NO_QUEUE= 1<<26, + IFF_OPENVSWITCH = 1<<27, }; #define IFF_802_1Q_VLANIFF_802_1Q_VLAN @@ -1322,6 +1324,7 @@ enum netdev_priv_flags { #define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE #define IFF_VRF_MASTER IFF_VRF_MASTER #define IFF_NO_QUEUE IFF_NO_QUEUE +#define IFF_OPENVSWITCHIFF_OPENVSWITCH /** * struct net_device - The DEVICE structure. @@ -3853,6 +3856,11 @@ static inline bool netif_is_bridge_master(const struct net_device *dev) return dev->priv_flags & IFF_EBRIDGE; } +static inline bool netif_is_ovs_master(const struct net_device *dev) +{ + return dev->priv_flags & IFF_OPENVSWITCH; +} >>> >>>We're going to run out of priv_flags bits. This flag doesn't seem >>>like something that will be checked lots of places. How about using >>>rtnl_link_ops->kind to save a bit in priv_flags? >>> >>>static inline bool netif_is_ovs_master(const struct net_device *dev) >>>{ >>>return !strcmp(dev->rtnl_link_ops->kind, "openvswitch")); >>>} >> >> There are lot of helpers like this for other soft-devices. I think that >> is okay to have it this way. The thing is that sometimes you need to use >> thi helper in fast path and in that case, you do not want to strcmp. >> >> There is plenty of priv_flags bits for now when I killed the bonding >> stuff. > >Ya, but think about the bit: you (and others) used a bit in priv_flags >to indicate the netdev type. Can you add an enum field to >rtnl_link_ops->type to indicate link type? Then it's not a strcmp. >You can write your helper using strcmp first, and then later migrate >to using rtnl_link_ops->type. Also, dev can be multiple things, it can be bridge port and vlan dev at the same time. Flags are good for this. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next] bpf: add support for %s specifier to bpf_trace_printk()
%s specifier makes bpf program and kernel debugging easier. To make sure that trace_printk won't crash the unsafe string is copied into stack and unsafe pointer is substituted. String is also sanitized for printable characters. The cost of swapping FS in probe_kernel_read is amortized over 4 chars to improve performance. Suggested-by: Brendan Gregg Signed-off-by: Alexei Starovoitov --- The following C program: #include int foo(struct pt_regs *ctx, struct filename *filename) { void *name = 0; bpf_probe_read(&name, sizeof(name), &filename->name); bpf_trace_printk("executed %s\\n", name); return 0; } when attached to kprobe do_execve() will produce output in /sys/kernel/debug/tracing/trace_pipe : make-13492 [002] d..1 3250.997277: : executed /bin/sh sh-13493 [004] d..1 3250.998716: : executed /usr/bin/gcc gcc-13494 [002] d..1 3250.999822: : executed /usr/lib/gcc/x86_64-linux-gnu/4.7/cc1 gcc-13495 [002] d..1 3251.006731: : executed /usr/bin/as gcc-13496 [002] d..1 3251.011831: : executed /usr/lib/gcc/x86_64-linux-gnu/4.7/collect2 collect2-13497 [000] d..1 3251.012941: : executed /usr/bin/ld kernel/trace/bpf_trace.c | 60 +++--- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ef9936df1b04..60d8f95258ed 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -79,13 +79,51 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; +static bool is_valid_char(char c) +{ + return (isprint(c) || isspace(c)) && isascii(c); +} + +/* similar to strncpy_from_user() but with extra checks */ +static void probe_read_string(char *buf, int size, long unsafe_ptr) +{ + char dst[4]; + int i = 0; + + size--; + for (;;) { + if (probe_kernel_read(dst, (void *) unsafe_ptr, 4)) + break; + + unsafe_ptr += 4; + + if (dst[0] == 0 || !is_valid_char(dst[0]) || i >= size) + break; + buf[i++] = dst[0]; + + if (dst[1] == 0 || !is_valid_char(dst[1]) || i >= size) + break; + buf[i++] = dst[1]; + + if (dst[2] == 0 || !is_valid_char(dst[2]) || i >= size) + break; + buf[i++] = dst[2]; + + if (dst[3] == 0 || !is_valid_char(dst[3]) || i >= size) + break; + buf[i++] = dst[3]; + } + buf[i] = 0; +} + /* * limited trace_printk() - * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed + * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed */ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) { char *fmt = (char *) (long) r1; + char buf[64]; int mod[3] = {}; int fmt_cnt = 0; int i; @@ -100,7 +138,7 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) /* check format string for allowed specifiers */ for (i = 0; i < fmt_size; i++) { - if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) + if (!is_valid_char(fmt[i])) return -EINVAL; if (fmt[i] != '%') @@ -114,12 +152,28 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) if (fmt[i] == 'l') { mod[fmt_cnt]++; i++; - } else if (fmt[i] == 'p') { + } else if (fmt[i] == 'p' || fmt[i] == 's') { mod[fmt_cnt]++; i++; if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) return -EINVAL; fmt_cnt++; + if (fmt[i - 1] == 's') { + switch (fmt_cnt) { + case 1: + probe_read_string(buf, sizeof(buf), r3); + r3 = (long) buf; + break; + case 2: + probe_read_string(buf, sizeof(buf), r4); + r4 = (long) buf; + break; + case 3: + probe_read_string(buf, sizeof(buf), r5); + r5 = (long) buf; + break; + } + } continue; } -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at
Re: [patch net-next 2/3] mlxsw: expose EMAD transactions statistics via debugfs
Thu, Aug 27, 2015 at 08:01:15AM CEST, da...@davemloft.net wrote: >From: Jiri Pirko >Date: Thu, 27 Aug 2015 07:40:04 +0200 > >> Switch object itselt would not help you to expose rocker internals. I >> don't think that you can find generic way, same for all drivers, to >> expose internal tables and stuff. That is hw specific. > >Tables are datastructures with names and types. > >Is it not possible to describe datastructures and their types with >user visible interfaces? I'm not saying it is not possible, it certainly is. But I think that for example rocker internals have no value to default user, he should not care and he cannot find out what is going on there without knowledge or rocker.c code. The question is, do we need some standard interface to expose random debugging data? I don't think so, I think that debugfs is exactly the tool to be used in that case. > >Anyone against what I am saying right now is simply lazy. Not lazy, just thinking :) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch net-next 3/6] net: add netif_is_ovs_master helper with IFF_OPENVSWITCH private flag
On Wed, Aug 26, 2015 at 10:43 PM, Jiri Pirko wrote: > Wed, Aug 26, 2015 at 07:43:18PM CEST, sfel...@gmail.com wrote: >>On Wed, Aug 26, 2015 at 9:36 AM, Jiri Pirko wrote: >>> From: Jiri Pirko >>> >>> Add this helper so code can easily figure out if netdev is openswitch. >>> >>> Signed-off-by: Jiri Pirko >>> --- >>> include/linux/netdevice.h| 8 >>> net/openvswitch/vport-internal_dev.c | 2 +- >>> 2 files changed, 9 insertions(+), 1 deletion(-) >>> >>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h >>> index be625f4..0a884e6 100644 >>> --- a/include/linux/netdevice.h >>> +++ b/include/linux/netdevice.h >>> @@ -1264,6 +1264,7 @@ struct net_device_ops { >>> * @IFF_MACVLAN: Macvlan device >>> * @IFF_VRF_MASTER: device is a VRF master >>> * @IFF_NO_QUEUE: device can run without qdisc attached >>> + * @IFF_VRF_OPENVSWITCH: device is a Open vSwitch master >>> */ >>> enum netdev_priv_flags { >>> IFF_802_1Q_VLAN = 1<<0, >>> @@ -1293,6 +1294,7 @@ enum netdev_priv_flags { >>> IFF_IPVLAN_SLAVE= 1<<24, >>> IFF_VRF_MASTER = 1<<25, >>> IFF_NO_QUEUE= 1<<26, >>> + IFF_OPENVSWITCH = 1<<27, >>> }; >>> >>> #define IFF_802_1Q_VLANIFF_802_1Q_VLAN >>> @@ -1322,6 +1324,7 @@ enum netdev_priv_flags { >>> #define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE >>> #define IFF_VRF_MASTER IFF_VRF_MASTER >>> #define IFF_NO_QUEUE IFF_NO_QUEUE >>> +#define IFF_OPENVSWITCHIFF_OPENVSWITCH >>> >>> /** >>> * struct net_device - The DEVICE structure. >>> @@ -3853,6 +3856,11 @@ static inline bool netif_is_bridge_master(const >>> struct net_device *dev) >>> return dev->priv_flags & IFF_EBRIDGE; >>> } >>> >>> +static inline bool netif_is_ovs_master(const struct net_device *dev) >>> +{ >>> + return dev->priv_flags & IFF_OPENVSWITCH; >>> +} >> >>We're going to run out of priv_flags bits. This flag doesn't seem >>like something that will be checked lots of places. How about using >>rtnl_link_ops->kind to save a bit in priv_flags? >> >>static inline bool netif_is_ovs_master(const struct net_device *dev) >>{ >>return !strcmp(dev->rtnl_link_ops->kind, "openvswitch")); >>} > > There are lot of helpers like this for other soft-devices. I think that > is okay to have it this way. The thing is that sometimes you need to use > thi helper in fast path and in that case, you do not want to strcmp. > > There is plenty of priv_flags bits for now when I killed the bonding > stuff. Ya, but think about the bit: you (and others) used a bit in priv_flags to indicate the netdev type. Can you add an enum field to rtnl_link_ops->type to indicate link type? Then it's not a strcmp. You can write your helper using strcmp first, and then later migrate to using rtnl_link_ops->type. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch net-next 2/3] mlxsw: expose EMAD transactions statistics via debugfs
From: Jiri Pirko Date: Thu, 27 Aug 2015 07:40:04 +0200 > Switch object itselt would not help you to expose rocker internals. I > don't think that you can find generic way, same for all drivers, to > expose internal tables and stuff. That is hw specific. Tables are datastructures with names and types. Is it not possible to describe datastructures and their types with user visible interfaces? Anyone against what I am saying right now is simply lazy. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next] bridge: Add netlink support for vlan_protocol attribute
This enables bridge vlan_protocol to be configured through netlink. When CONFIG_BRIDGE_VLAN_FILTERING is disabled, kernel behaves the same way as this feature is not implemented. Signed-off-by: Toshiaki Makita --- include/uapi/linux/if_link.h | 1 + net/bridge/br_netlink.c | 34 ++ net/bridge/br_private.h | 1 + net/bridge/br_vlan.c | 35 +-- 4 files changed, 57 insertions(+), 14 deletions(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 313c305..2d13dd4 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -231,6 +231,7 @@ enum { IFLA_BR_STP_STATE, IFLA_BR_PRIORITY, IFLA_BR_VLAN_FILTERING, + IFLA_BR_VLAN_PROTOCOL, __IFLA_BR_MAX, }; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index dbcb194..9cea3cf 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -673,6 +673,21 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return -EADDRNOTAVAIL; } + if (!data) + return 0; + +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (data[IFLA_BR_VLAN_PROTOCOL]) { + switch (nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL])) { + case htons(ETH_P_8021Q): + case htons(ETH_P_8021AD): + break; + default: + return -EPROTONOSUPPORT; + } + } +#endif + return 0; } @@ -729,6 +744,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_STP_STATE] = { .type = NLA_U32 }, [IFLA_BR_PRIORITY] = { .type = NLA_U16 }, [IFLA_BR_VLAN_FILTERING] = { .type = NLA_U8 }, + [IFLA_BR_VLAN_PROTOCOL] = { .type = NLA_U16 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -784,6 +800,16 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return err; } +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (data[IFLA_BR_VLAN_PROTOCOL]) { + u16 vlan_proto = nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL]); + + err = __br_vlan_set_proto(br, vlan_proto); + if (err) + return err; + } +#endif + return 0; } @@ -796,6 +822,9 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size(sizeof(u32)) +/* IFLA_BR_STP_STATE */ nla_total_size(sizeof(u16)) +/* IFLA_BR_PRIORITY */ nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_FILTERING */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */ +#endif 0; } @@ -819,6 +848,11 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_VLAN_FILTERING, vlan_enabled)) return -EMSGSIZE; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto)) + return -EMSGSIZE; +#endif + return 0; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 3d95647..19e8f79 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -616,6 +616,7 @@ bool br_vlan_find(struct net_bridge *br, u16 vid); void br_recalculate_fwd_mask(struct net_bridge *br); int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); +int __br_vlan_set_proto(struct net_bridge *br, __be16 proto); int br_vlan_set_proto(struct net_bridge *br, unsigned long val); int br_vlan_init(struct net_bridge *br); int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 3cef689..3cd8cc9 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -492,23 +492,16 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) return 0; } -int br_vlan_set_proto(struct net_bridge *br, unsigned long val) +int __br_vlan_set_proto(struct net_bridge *br, __be16 proto) { int err = 0; struct net_bridge_port *p; struct net_port_vlans *pv; - __be16 proto, oldproto; + __be16 oldproto; u16 vid, errvid; - if (val != ETH_P_8021Q && val != ETH_P_8021AD) - return -EPROTONOSUPPORT; - - if (!rtnl_trylock()) - return restart_syscall(); - - proto = htons(val); if (br->vlan_proto == proto) - goto unlock; + return 0; /* Add VLANs for the new proto to the device filter. */ list_for_each_entry(p, &br->port_list, list) { @@ -539,9 +532,7 @@ int br_vlan_set_proto(struct net_bridge *br, unsigned long val) vlan_vid_del(p->dev, old
Re: tcp: add NV congestion control
The updated NV document with the new experiments and a table with all the experimental results are now available at (http://www.brakmo.org/networking/tcp-nv/TCPNV.html). - Lawrence On 8/25/15, 4:33 PM, "Lawrence Brakmo" wrote: >Changes from v5: cleaning of NV code, changing some default parameters > >I've run more extensive tests, I'm working on updating the NV website >(http://www.brakmo.org/networking/tcp-nv/TCPNV.html) should be updated >by tomorrow (8/26). > >The updated tests include Reno, Cubic, NV and CDG and include more types >of traffic. Overview of results: >1) NV has a little lower throughput (2-3% less) with small number of flows > as compared to Reno, Cubic and CDG >2) NV is less fair with few flows but becomes more fair with more flows >3) Less losses with NV (none in many cases) as compared to all others. > One exception is when things get very congested (64 flows into one > server), NV has 50% more losses than CDG, Cubic has 1.8x to 10x more > losses than CDG. Reno has about the same losses as CDG. >4) In mixed traffic (1M and 10K RPCs), 10K flows achieve much higher > average throughput with NV than with the others (which are > very similar). In one example, 2 clients sending 1M and 10K to 2 > servers, with NV 10K flows average 1Gbps and 1M flows 3.7Gbps, > whereas they average about 226Mbps and 4.4Gbps for Reno, Cubic and > CDG. They all have similar link utilization. > >Consists of the following patches: > >[RFC PATCH v6 net-next 1/4] tcp: replace cnt & rtt with struct in >[RFC PATCH v6 net-next 2/4] tcp: refactor struct tcp_skb_cb >[RFC PATCH v6 net-next 3/4] tcp: add in_flight to tcp_skb_cb >[RFC PATCH v6 net-next 4/4] tcp: add NV congestion control > >Signed-off-by: Lawrence Brakmo > >include/net/tcp.h | 20 ++- >net/ipv4/Kconfig| 16 ++ >net/ipv4/Makefile | 1 + >net/ipv4/tcp_bic.c | 6 +- >net/ipv4/tcp_cdg.c | 14 +- >net/ipv4/tcp_cubic.c| 6 +- >net/ipv4/tcp_htcp.c | 10 +- >net/ipv4/tcp_illinois.c | 20 +-- >net/ipv4/tcp_input.c| 10 +- >net/ipv4/tcp_lp.c | 6 +- >net/ipv4/tcp_nv.c | 489 >++ >net/ipv4/tcp_output.c | 4 +- >net/ipv4/tcp_vegas.c| 6 +- >net/ipv4/tcp_vegas.h| 2 +- >net/ipv4/tcp_veno.c | 7 +- >net/ipv4/tcp_westwood.c | 7 +- >net/ipv4/tcp_yeah.c | 7 +- >17 files changed, 580 insertions(+), 51 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch net-next 3/6] net: add netif_is_ovs_master helper with IFF_OPENVSWITCH private flag
Wed, Aug 26, 2015 at 07:43:18PM CEST, sfel...@gmail.com wrote: >On Wed, Aug 26, 2015 at 9:36 AM, Jiri Pirko wrote: >> From: Jiri Pirko >> >> Add this helper so code can easily figure out if netdev is openswitch. >> >> Signed-off-by: Jiri Pirko >> --- >> include/linux/netdevice.h| 8 >> net/openvswitch/vport-internal_dev.c | 2 +- >> 2 files changed, 9 insertions(+), 1 deletion(-) >> >> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h >> index be625f4..0a884e6 100644 >> --- a/include/linux/netdevice.h >> +++ b/include/linux/netdevice.h >> @@ -1264,6 +1264,7 @@ struct net_device_ops { >> * @IFF_MACVLAN: Macvlan device >> * @IFF_VRF_MASTER: device is a VRF master >> * @IFF_NO_QUEUE: device can run without qdisc attached >> + * @IFF_VRF_OPENVSWITCH: device is a Open vSwitch master >> */ >> enum netdev_priv_flags { >> IFF_802_1Q_VLAN = 1<<0, >> @@ -1293,6 +1294,7 @@ enum netdev_priv_flags { >> IFF_IPVLAN_SLAVE= 1<<24, >> IFF_VRF_MASTER = 1<<25, >> IFF_NO_QUEUE= 1<<26, >> + IFF_OPENVSWITCH = 1<<27, >> }; >> >> #define IFF_802_1Q_VLANIFF_802_1Q_VLAN >> @@ -1322,6 +1324,7 @@ enum netdev_priv_flags { >> #define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE >> #define IFF_VRF_MASTER IFF_VRF_MASTER >> #define IFF_NO_QUEUE IFF_NO_QUEUE >> +#define IFF_OPENVSWITCHIFF_OPENVSWITCH >> >> /** >> * struct net_device - The DEVICE structure. >> @@ -3853,6 +3856,11 @@ static inline bool netif_is_bridge_master(const >> struct net_device *dev) >> return dev->priv_flags & IFF_EBRIDGE; >> } >> >> +static inline bool netif_is_ovs_master(const struct net_device *dev) >> +{ >> + return dev->priv_flags & IFF_OPENVSWITCH; >> +} > >We're going to run out of priv_flags bits. This flag doesn't seem >like something that will be checked lots of places. How about using >rtnl_link_ops->kind to save a bit in priv_flags? > >static inline bool netif_is_ovs_master(const struct net_device *dev) >{ >return !strcmp(dev->rtnl_link_ops->kind, "openvswitch")); >} There are lot of helpers like this for other soft-devices. I think that is okay to have it this way. The thing is that sometimes you need to use thi helper in fast path and in that case, you do not want to strcmp. There is plenty of priv_flags bits for now when I killed the bonding stuff. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch net-next 3/6] net: add netif_is_ovs_master helper with IFF_OPENVSWITCH private flag
Wed, Aug 26, 2015 at 07:24:55PM CEST, f.faine...@gmail.com wrote: >On 26/08/15 09:36, Jiri Pirko wrote: >> From: Jiri Pirko >> >> Add this helper so code can easily figure out if netdev is openswitch. >> >> Signed-off-by: Jiri Pirko >> --- >> include/linux/netdevice.h| 8 >> net/openvswitch/vport-internal_dev.c | 2 +- >> 2 files changed, 9 insertions(+), 1 deletion(-) >> >> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h >> index be625f4..0a884e6 100644 >> --- a/include/linux/netdevice.h >> +++ b/include/linux/netdevice.h >> @@ -1264,6 +1264,7 @@ struct net_device_ops { >> * @IFF_MACVLAN: Macvlan device >> * @IFF_VRF_MASTER: device is a VRF master >> * @IFF_NO_QUEUE: device can run without qdisc attached >> + * @IFF_VRF_OPENVSWITCH: device is a Open vSwitch master > >Typo, the flag you introduced is named IFF_OPENVSWITCH, not VFR_OPENSWITCH. Oups, will fix. Thanks -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch net-next 2/3] mlxsw: expose EMAD transactions statistics via debugfs
Wed, Aug 26, 2015 at 08:21:59PM CEST, sfel...@gmail.com wrote: >On Wed, Aug 26, 2015 at 10:49 AM, David Miller wrote: >> From: Jiri Pirko >> Date: Wed, 26 Aug 2015 09:37:57 +0200 >> >>> I don't think that are much more cases like this. Therefore I think that >>> for this cases, debugfs might be a good way to expose debugging stats. >> >> Scott wanted to do similar things in rocker. DSA guys too. >> >> Every switch device is going to have some kind of hierarchy like >> this, it's not a unique situation. > >We've been able to get buy so far without a user-visible device for >the switch. The switch ports are represented by netdevs, so that's >easy. How can we create an object for the switch itself, so we can >attach common interfaces for the user to dump switch-level stats or >tables? Using another netdev doesn't seem right. Do we need a new >device class for switches, and then create some common tool/interfaces >for switch device class? Switch object itselt would not help you to expose rocker internals. I don't think that you can find generic way, same for all drivers, to expose internal tables and stuff. That is hw specific. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next] bnx2x: Add new device ids under the Qlogic vendor
This adds support for 3 new PCI device combinations - 1077:16a1, 1077:16a4 and 1077:16ad. Signed-off-by: Yuval Mintz --- Hi Dave, Please consider applying this to 'net-next'. Thanks, Yuval --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 26fbfcc..e3da2bd 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -266,11 +266,14 @@ static const struct pci_device_id bnx2x_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57810_MF), BCM57810_MF }, { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57840_O), BCM57840_O }, { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57840_4_10), BCM57840_4_10 }, + { PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_NX2_57840_4_10), BCM57840_4_10 }, { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57840_2_20), BCM57840_2_20 }, { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57810_VF), BCM57810_VF }, { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57840_MFO), BCM57840_MFO }, { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57840_MF), BCM57840_MF }, + { PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_NX2_57840_MF), BCM57840_MF }, { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57840_VF), BCM57840_VF }, + { PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_NX2_57840_VF), BCM57840_VF }, { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57811), BCM57811 }, { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57811_MF), BCM57811_MF }, { PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_57811_VF), BCM57811_VF }, -- 1.9.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next v2] bridge: vlan: allow to suppress local mac install for all vlans
On 8/26/15, 4:33 AM, Nikolay Aleksandrov wrote: On Aug 25, 2015, at 11:06 PM, David Miller wrote: From: Nikolay Aleksandrov Date: Tue, 25 Aug 2015 22:28:16 -0700 Certainly, that should be done and I will look into it, but the essence of this patch is a bit different. The problem here is not the size of the fdb entries, it’s more the number of them - having 96000 entries (even if they were 1 byte ones) is just way too much especially when the fdb hash size is small and static. We could work on making it dynamic though, but still these type of local entries per vlan per port can easily be avoided with this option. 96000 bits can be stored in 12k. Get where I'm going with this? Look at the problem sideways. Oh okay, I misunderstood your previous comment. I’ll look into that. I just wanted to add the other problems we have had with keeping these macs (mostly from userspace POV): - add/del netlink notification storms - and large netlink dumps In addition to in-kernel optimizations, will be nice to have a solution that reduces the burden on userspace. That will need a newer netlink dump format for fdbs. Considering all the changes needed, Nikolays patch seems less intrusive. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 net-next 7/8] geneve: Consolidate Geneve functionality in single module.
On Wed, Aug 26, 2015 at 8:57 PM, Pravin Shelar wrote: > On Wed, Aug 26, 2015 at 8:08 PM, Jesse Gross wrote: >> On Wed, Aug 26, 2015 at 2:54 PM, Pravin B Shelar wrote: >>> @@ -293,15 +615,13 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, >>> struct net_device *dev) >> [...] >>> + err = geneve_build_skb(rt, skb, key->tun_flags, vni, >>> + info->options_len, opts, udp_csum); >>> + if (unlikely(err)) >>> + goto err; >> [...] >>> tx_error: >>> - dev->stats.tx_errors++; >>> dev_kfree_skb(skb); >>> +err: >>> + dev->stats.tx_errors++; >>> return NETDEV_TX_OK; >>> } >> >> I'm not sure that it makes sense to break out these error conditions - >> geneve_build_skb() doesn't free the skb, so it looks like there is a >> leak on failure. > > It does frees skb. Which case are you looking at? Never mind, you're right. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 net-next 7/8] geneve: Consolidate Geneve functionality in single module.
On Wed, Aug 26, 2015 at 8:08 PM, Jesse Gross wrote: > On Wed, Aug 26, 2015 at 2:54 PM, Pravin B Shelar wrote: >> diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c >> index d05150c..a36a1de 100644 >> --- a/drivers/net/geneve.c >> +++ b/drivers/net/geneve.c >> @@ -138,16 +164,18 @@ static void geneve_rx(struct geneve_sock *gs, struct >> sk_buff *skb) >> opts = ip_tunnel_info_opts(&tun_dst->u.tun_info, >>gnvh->opt_len * 4); >> memcpy(opts, gnvh->options, gnvh->opt_len * 4); >> + xnet = false; >> } else { >> /* Drop packets w/ critical options, >> * since we don't support any... >> */ >> if (gnvh->critical) >> goto drop; >> + xnet = !net_eq(geneve->net, dev_net(geneve->dev)); >> } > > Now that devices can be placed in any namespace, I think we can't > assume that xnet is false in the first case. > ok. >> @@ -293,15 +615,13 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, >> struct net_device *dev) > [...] >> + err = geneve_build_skb(rt, skb, key->tun_flags, vni, >> + info->options_len, opts, udp_csum); >> + if (unlikely(err)) >> + goto err; > [...] >> tx_error: >> - dev->stats.tx_errors++; >> dev_kfree_skb(skb); >> +err: >> + dev->stats.tx_errors++; >> return NETDEV_TX_OK; >> } > > I'm not sure that it makes sense to break out these error conditions - > geneve_build_skb() doesn't free the skb, so it looks like there is a > leak on failure. It does frees skb. Which case are you looking at? -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH -next v3 2/2] smsc911x: Ignore error return from device_get_phy_mode()
Commit 62ee783bf1f8 ("smsc911x: Fix crash seen if neither ACPI nor OF is configured or used") introduces an error check for the return value from device_get_phy_mode() and bails out if there is an error. Unfortunately, there are configurations where no phy is configured. Those configurations now fail. To fix the problem, accept error returns from device_get_phy_mode(), and use the return value from device_property_read_u32() to determine if there is a suitable firmware interface to read the configuration. Fixes: 62ee783bf1f8 ("smsc911x: Fix crash seen if neither ACPI nor OF is configured or used") Tested-by: Tony Lindgren Signed-off-by: Guenter Roeck --- v2: Dropped RFC Removed check for -ENODATA Depends on patch 1/2 v3: Added Tony's Tested-by: Tested with non-devicetree and devicetree configurations. Should be tested with ACPI configuration. drivers/net/ethernet/smsc/smsc911x.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 6eef3251d833..c8b26259c9cf 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2369,23 +2369,25 @@ static int smsc911x_probe_config(struct smsc911x_platform_config *config, { int phy_interface; u32 width = 0; + int err; phy_interface = device_get_phy_mode(dev); if (phy_interface < 0) - return phy_interface; - + phy_interface = PHY_INTERFACE_MODE_NA; config->phy_interface = phy_interface; device_get_mac_address(dev, config->mac, ETH_ALEN); - device_property_read_u32(dev, "reg-shift", &config->shift); - - device_property_read_u32(dev, "reg-io-width", &width); - if (width == 4) + err = device_property_read_u32(dev, "reg-io-width", &width); + if (err == -ENXIO) + return err; + if (!err && width == 4) config->flags |= SMSC911X_USE_32BIT; else config->flags |= SMSC911X_USE_16BIT; + device_property_read_u32(dev, "reg-shift", &config->shift); + if (device_property_present(dev, "smsc,irq-active-high")) config->irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_HIGH; -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH -next v3 1/2] device property: Return -ENXIO if there is no suitable FW interface
Return -ENXIO if device property array access functions don't find a suitable firmware interface. This lets drivers decide if they should use available platform data instead. Cc: Rafael J. Wysocki Signed-off-by: Guenter Roeck --- v2: Added patch v3: Document that device_property_read_string_array and device_property_read_string can also return -ENXIO. Move check if there is pset data, and thus detection if there is a suitable firmwawre interface, out of pset_prop_read_array into the calling code. drivers/base/property.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index 287704d680bf..caf937af98b3 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -156,6 +156,7 @@ EXPORT_SYMBOL_GPL(fwnode_property_present); *%-ENODATA if the property does not have a value, *%-EPROTO if the property is not an array of numbers, *%-EOVERFLOW if the size of the property is not as expected. + *%-ENXIO if no suitable firmware interface is present. */ int device_property_read_u8_array(struct device *dev, const char *propname, u8 *val, size_t nval) @@ -180,6 +181,7 @@ EXPORT_SYMBOL_GPL(device_property_read_u8_array); *%-ENODATA if the property does not have a value, *%-EPROTO if the property is not an array of numbers, *%-EOVERFLOW if the size of the property is not as expected. + *%-ENXIO if no suitable firmware interface is present. */ int device_property_read_u16_array(struct device *dev, const char *propname, u16 *val, size_t nval) @@ -204,6 +206,7 @@ EXPORT_SYMBOL_GPL(device_property_read_u16_array); *%-ENODATA if the property does not have a value, *%-EPROTO if the property is not an array of numbers, *%-EOVERFLOW if the size of the property is not as expected. + *%-ENXIO if no suitable firmware interface is present. */ int device_property_read_u32_array(struct device *dev, const char *propname, u32 *val, size_t nval) @@ -228,6 +231,7 @@ EXPORT_SYMBOL_GPL(device_property_read_u32_array); *%-ENODATA if the property does not have a value, *%-EPROTO if the property is not an array of numbers, *%-EOVERFLOW if the size of the property is not as expected. + *%-ENXIO if no suitable firmware interface is present. */ int device_property_read_u64_array(struct device *dev, const char *propname, u64 *val, size_t nval) @@ -252,6 +256,7 @@ EXPORT_SYMBOL_GPL(device_property_read_u64_array); *%-ENODATA if the property does not have a value, *%-EPROTO or %-EILSEQ if the property is not an array of strings, *%-EOVERFLOW if the size of the property is not as expected. + *%-ENXIO if no suitable firmware interface is present. */ int device_property_read_string_array(struct device *dev, const char *propname, const char **val, size_t nval) @@ -273,6 +278,7 @@ EXPORT_SYMBOL_GPL(device_property_read_string_array); *%-EINVAL if given arguments are not valid, *%-ENODATA if the property does not have a value, *%-EPROTO or %-EILSEQ if the property type is not a string. + *%-ENXIO if no suitable firmware interface is present. */ int device_property_read_string(struct device *dev, const char *propname, const char **val) @@ -294,9 +300,11 @@ EXPORT_SYMBOL_GPL(device_property_read_string); else if (is_acpi_node(_fwnode_)) \ _ret_ = acpi_dev_prop_read(to_acpi_node(_fwnode_), _propname_, \ _proptype_, _val_, _nval_); \ - else \ + else if (is_pset(_fwnode_)) \ _ret_ = pset_prop_read_array(to_pset(_fwnode_), _propname_, \ _proptype_, _val_, _nval_); \ + else \ + _ret_ = -ENXIO; \ _ret_; \ }) @@ -434,9 +442,10 @@ int fwnode_property_read_string_array(struct fwnode_handle *fwnode, else if (is_acpi_node(fwnode)) return acpi_dev_prop_read(to_acpi_node(fwnode), propname, DEV_PROP_STRING, val, nval); - - return pset_prop_read_array(to_pset(fwnode), propname, - DEV_PROP_STRING, val, nval); + else if (is_pset(fwnode)) + return pset_prop_read_array(to_pset(fwnode), propname, + DEV_PROP_STRING, val, nval); + return -ENXIO; } EXPORT_SYMBOL_GPL(fwnode_property_read_string_array); -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.
Re: [PATCH v4 net-next 7/8] geneve: Consolidate Geneve functionality in single module.
On Wed, Aug 26, 2015 at 2:54 PM, Pravin B Shelar wrote: > diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c > index d05150c..a36a1de 100644 > --- a/drivers/net/geneve.c > +++ b/drivers/net/geneve.c > @@ -138,16 +164,18 @@ static void geneve_rx(struct geneve_sock *gs, struct > sk_buff *skb) > opts = ip_tunnel_info_opts(&tun_dst->u.tun_info, >gnvh->opt_len * 4); > memcpy(opts, gnvh->options, gnvh->opt_len * 4); > + xnet = false; > } else { > /* Drop packets w/ critical options, > * since we don't support any... > */ > if (gnvh->critical) > goto drop; > + xnet = !net_eq(geneve->net, dev_net(geneve->dev)); > } Now that devices can be placed in any namespace, I think we can't assume that xnet is false in the first case. > @@ -293,15 +615,13 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, > struct net_device *dev) [...] > + err = geneve_build_skb(rt, skb, key->tun_flags, vni, > + info->options_len, opts, udp_csum); > + if (unlikely(err)) > + goto err; [...] > tx_error: > - dev->stats.tx_errors++; > dev_kfree_skb(skb); > +err: > + dev->stats.tx_errors++; > return NETDEV_TX_OK; > } I'm not sure that it makes sense to break out these error conditions - geneve_build_skb() doesn't free the skb, so it looks like there is a leak on failure. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next] net: Check frag_lists first to prevent data out of order
On Wed, 2015-08-26 at 19:12 -0700, Eric Dumazet wrote: > On Thu, 2015-08-27 at 08:56 +0800, chenweil...@huawei.com wrote: > > From: Weilong Chen > > > > When try to merge several skbs to prior one, if the frag_list is > > used and the the last one is a small packet, once the condition > > "len <= skb_tailroom(to)" is satisfied, we will get a wrong > > packet! > > This patch just check frag_lists before the condtion to prevent > > this from happening. > > > > Signed-off-by: Weilong Chen > > --- > > net/core/skbuff.c | 6 +++--- > > 1 file changed, 3 insertions(+), 3 deletions(-) > > > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > > index 8a725cc..d08edcb 100644 > > --- a/net/core/skbuff.c > > +++ b/net/core/skbuff.c > > @@ -4133,6 +4133,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct > > sk_buff *from, > > if (skb_cloned(to)) > > return false; > > > > + if (skb_has_frag_list(to) || skb_has_frag_list(from)) > > + return false; > > + > > if (len <= skb_tailroom(to)) { > > if (len) > > BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); > > @@ -4140,9 +4143,6 @@ bool skb_try_coalesce(struct sk_buff *to, struct > > sk_buff *from, > > return true; > > } > > > > - if (skb_has_frag_list(to) || skb_has_frag_list(from)) > > - return false; > > - > > if (skb_headlen(from) != 0) { > > struct page *page; > > unsigned int offset; > > Sigh. > > No idea what problem you tried to solve. > > This patch is not needed. > > If (len <= skb_tailroom()), then it is obviously correct to copy_bits() > the bytes. > > Hints : > > - If @to has a fraglist, then skb_tailroom(to) is 0 so the copy can not > be done. > > - If @from has a fraglist, it is not relevant as we copy it into @to and > will free @from. This is going to be a FAQ http://www.spinics.net/lists/netdev/msg315090.html -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next] net: Check frag_lists first to prevent data out of order
On Thu, 2015-08-27 at 08:56 +0800, chenweil...@huawei.com wrote: > From: Weilong Chen > > When try to merge several skbs to prior one, if the frag_list is > used and the the last one is a small packet, once the condition > "len <= skb_tailroom(to)" is satisfied, we will get a wrong > packet! > This patch just check frag_lists before the condtion to prevent > this from happening. > > Signed-off-by: Weilong Chen > --- > net/core/skbuff.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > index 8a725cc..d08edcb 100644 > --- a/net/core/skbuff.c > +++ b/net/core/skbuff.c > @@ -4133,6 +4133,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct > sk_buff *from, > if (skb_cloned(to)) > return false; > > + if (skb_has_frag_list(to) || skb_has_frag_list(from)) > + return false; > + > if (len <= skb_tailroom(to)) { > if (len) > BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); > @@ -4140,9 +4143,6 @@ bool skb_try_coalesce(struct sk_buff *to, struct > sk_buff *from, > return true; > } > > - if (skb_has_frag_list(to) || skb_has_frag_list(from)) > - return false; > - > if (skb_headlen(from) != 0) { > struct page *page; > unsigned int offset; Sigh. No idea what problem you tried to solve. This patch is not needed. If (len <= skb_tailroom()), then it is obviously correct to copy_bits() the bytes. Hints : - If @to has a fraglist, then skb_tailroom(to) is 0 so the copy can not be done. - If @from has a fraglist, it is not relevant as we copy it into @to and will free @from. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 net-next 5/8] geneve: Add support to collect tunnel metadata.
On Wed, Aug 26, 2015 at 2:54 PM, Pravin B Shelar wrote: > Following patch create new tunnel flag which enable > tunnel metadata collection on given device. These devices > can be used by tunnel metadata based routing or by OVS. > Geneve Consolidation patch get rid of collect_md_tun to > simplify tunnel lookup further. > > Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [Question] Usage of dev_hold()/dev_put()
Very clear explanation,thank you! :-) Zhangjie -Original Message- From: Eric Dumazet [mailto:eric.duma...@gmail.com] Sent: Wednesday, August 26, 2015 9:10 PM To: Zhangjie (HZ) Cc: Jason Wang; netdev@vger.kernel.org; Qinchuanyu; Yewudi; liuyongan 00175866; Wangbicheng; Yan Chen Subject: Re: [Question] Usage of dev_hold()/dev_put() On Wed, 2015-08-26 at 07:48 +, Zhangjie (HZ) wrote: > Eric, > Thank you for your patient apply. > There is still a question, > In receive path, driver does not call dev_hold(), when skb goes to host > stack, skb->dev is likely to be used. > If device is destroyed before that, it seems dangerous. This is also handled properly. Check : flush_backlog() in net/core/dev.c sock_queue_rcv_skb() , and all functions setting skb->dev to NULL
[PATCH net-next] net: Check frag_lists first to prevent data out of order
From: Weilong Chen When try to merge several skbs to prior one, if the frag_list is used and the the last one is a small packet, once the condition "len <= skb_tailroom(to)" is satisfied, we will get a wrong packet! This patch just check frag_lists before the condtion to prevent this from happening. Signed-off-by: Weilong Chen --- net/core/skbuff.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8a725cc..d08edcb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4133,6 +4133,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, if (skb_cloned(to)) return false; + if (skb_has_frag_list(to) || skb_has_frag_list(from)) + return false; + if (len <= skb_tailroom(to)) { if (len) BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); @@ -4140,9 +4143,6 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, return true; } - if (skb_has_frag_list(to) || skb_has_frag_list(from)) - return false; - if (skb_headlen(from) != 0) { struct page *page; unsigned int offset; -- 1.7.12 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH -next v2 1/2] device property: Return -ENXIO if there is no suitable FW interface
On Wednesday, August 26, 2015 04:25:59 PM Guenter Roeck wrote: > On 08/26/2015 04:37 PM, Rafael J. Wysocki wrote: > > On Wednesday, August 26, 2015 01:20:44 PM Guenter Roeck wrote: > >> Return -ENXIO if device property array access functions don't find > >> a suitable firmware interface. > >> > >> This lets drivers decide if they should use available platform data > >> instead. > >> > >> Cc: Rafael J. Wysocki > >> Signed-off-by: Guenter Roeck > >> --- > >> v2: Added patch > >> > >> drivers/base/property.c | 7 +++ > >> 1 file changed, 7 insertions(+) > >> > >> diff --git a/drivers/base/property.c b/drivers/base/property.c > >> index 287704d680bf..9600b824d138 100644 > >> --- a/drivers/base/property.c > >> +++ b/drivers/base/property.c > >> @@ -69,6 +69,9 @@ static int pset_prop_read_array(struct property_set > >> *pset, const char *name, > >>struct property_entry *prop; > >>unsigned int item_size; > >> > >> + if (!pset) > >> + return -ENXIO; > >> + > > > > This isn't exactly straightforward, because it relies on the fact that > > pset_prop_read_array() is the last thing tried by FWNODE_PROP_READ_ARRAY() > > and fwnode_property_read_string_array(). A comment about that might be > > helpful. > > > I see two options: Add a comment above, or change the calling code to > > ... > else if (is_pset(fwnode)) > return pset_prop_read_array(to_pset(fwnode), propname, > DEV_PROP_STRING, val, nval); > return -ENXIO; > > which would make it obvious and avoid side effects if the code is changed > later on. Would you be ok with this ? The second option is obviously cleaner to me and I prefer cleaner code. :-) Thanks, Rafael -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next] bpf: fix bpf_skb_set_tunnel_key() helper
From: Alexei Starovoitov Date: Wed, 26 Aug 2015 15:57:38 -0700 > Make sure to indicate to tunnel driver that key.tun_id is set, > otherwise gre won't recognize the metadata. > > Fixes: d3aa45ce6b94 ("bpf: add helpers to access tunnel metadata") > Signed-off-by: Alexei Starovoitov > --- > With this fix both vxlan and gretap are working with tc+bpf. Applied, thanks. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next] vrf: Add ethernet header for pass through VRF device
From: David Ahern Date: Wed, 26 Aug 2015 12:36:15 -0700 > As such all we need is to push an eth header to the front of the skb > for 1 loop through the stack and eth_header via dev_hard_header with > NULL daddr is the simplest path to accomplish that. Any other path is > just extra overhead. And dev_hard_header() is full of conditional code and partial stores, whereas the hard header cache is a _SINGLE UNCONDITIONAL MEMCPY_. You're making this data path more expensive than it needs to be just to placate features which in no way are default situations. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch net-next 2/3] mlxsw: expose EMAD transactions statistics via debugfs
From: Scott Feldman Date: Wed, 26 Aug 2015 11:21:59 -0700 > Using another netdev doesn't seem right. Do we need a new device > class for switches, and then create some common tool/interfaces for > switch device class? This is probably what we will need to do. There has also been a discussion lately about making light weight netdev objects, of which these new switch things can be a super-class of. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Patch net-next 4/5] net_sched: forbid setting default qdisc to inappropriate ones
On Wed, Aug 26, 2015 at 5:08 PM, Stephen Hemminger wrote: > On Wed, 26 Aug 2015 15:41:26 -0700 > Cong Wang wrote: > >> Currently there is no check for if a qdisc is appropriate >> to be used as the default qdisc. This causes we get no >> error even we set the default qdisc to an inappropriate one >> but an error will be shown up later. This is not good. >> >> Also, for qdisc's like HTB, kernel will just crash when >> we use it as default qdisc, because some data structures are >> not even initialized yet before checking opt == NULL, the cleanup >> doing ->reset() or ->destroy() on them will just crash. > > Why not fix the buggy one's instead? They are not exactly buggy, since they are fine in other ->init() calling cases. As in the first paragraph you quoted from me, it is more like a usability issue, for example ingress qdisc can be set as default without any error at any time. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Patch net-next 4/5] net_sched: forbid setting default qdisc to inappropriate ones
On Wed, 26 Aug 2015 15:41:26 -0700 Cong Wang wrote: > Currently there is no check for if a qdisc is appropriate > to be used as the default qdisc. This causes we get no > error even we set the default qdisc to an inappropriate one > but an error will be shown up later. This is not good. > > Also, for qdisc's like HTB, kernel will just crash when > we use it as default qdisc, because some data structures are > not even initialized yet before checking opt == NULL, the cleanup > doing ->reset() or ->destroy() on them will just crash. Why not fix the buggy one's instead? -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT PULL nf-next] Second Round of IPVS Updates for v4.3
On Wed, Aug 26, 2015 at 08:41:50PM +0200, Pablo Neira Ayuso wrote: > On Fri, Aug 21, 2015 at 09:23:38AM -0700, Simon Horman wrote: > > Hi Pablo, > > > > please consider these IPVS Updates for v4.3. > > > > I realise these are a little late in the cycle, so if you would prefer > > me to repost them for v4.4 then just let me know. > > Pulled, thanks Simon. > > Let me see if this gets into this merge window, otherwise I'll keep it > in my tree and will submit in the next merge window. Thanks, and sorry once again for being somewhat late. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH -next v2 1/2] device property: Return -ENXIO if there is no suitable FW interface
On 08/26/2015 04:37 PM, Rafael J. Wysocki wrote: On Wednesday, August 26, 2015 01:20:44 PM Guenter Roeck wrote: Return -ENXIO if device property array access functions don't find a suitable firmware interface. This lets drivers decide if they should use available platform data instead. Cc: Rafael J. Wysocki Signed-off-by: Guenter Roeck --- v2: Added patch drivers/base/property.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/base/property.c b/drivers/base/property.c index 287704d680bf..9600b824d138 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -69,6 +69,9 @@ static int pset_prop_read_array(struct property_set *pset, const char *name, struct property_entry *prop; unsigned int item_size; + if (!pset) + return -ENXIO; + This isn't exactly straightforward, because it relies on the fact that pset_prop_read_array() is the last thing tried by FWNODE_PROP_READ_ARRAY() and fwnode_property_read_string_array(). A comment about that might be helpful. I see two options: Add a comment above, or change the calling code to ... else if (is_pset(fwnode)) return pset_prop_read_array(to_pset(fwnode), propname, DEV_PROP_STRING, val, nval); return -ENXIO; which would make it obvious and avoid side effects if the code is changed later on. Would you be ok with this ? Thanks, Guenter -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH -next v2 1/2] device property: Return -ENXIO if there is no suitable FW interface
On Wednesday, August 26, 2015 01:20:44 PM Guenter Roeck wrote: > Return -ENXIO if device property array access functions don't find > a suitable firmware interface. > > This lets drivers decide if they should use available platform data > instead. > > Cc: Rafael J. Wysocki > Signed-off-by: Guenter Roeck > --- > v2: Added patch > > drivers/base/property.c | 7 +++ > 1 file changed, 7 insertions(+) > > diff --git a/drivers/base/property.c b/drivers/base/property.c > index 287704d680bf..9600b824d138 100644 > --- a/drivers/base/property.c > +++ b/drivers/base/property.c > @@ -69,6 +69,9 @@ static int pset_prop_read_array(struct property_set *pset, > const char *name, > struct property_entry *prop; > unsigned int item_size; > > + if (!pset) > + return -ENXIO; > + This isn't exactly straightforward, because it relies on the fact that pset_prop_read_array() is the last thing tried by FWNODE_PROP_READ_ARRAY() and fwnode_property_read_string_array(). A comment about that might be helpful. Thanks, Rafael -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next] bpf: fix bpf_skb_set_tunnel_key() helper
Make sure to indicate to tunnel driver that key.tun_id is set, otherwise gre won't recognize the metadata. Fixes: d3aa45ce6b94 ("bpf: add helpers to access tunnel metadata") Signed-off-by: Alexei Starovoitov --- With this fix both vxlan and gretap are working with tc+bpf. net/core/filter.c |1 + 1 file changed, 1 insertion(+) diff --git a/net/core/filter.c b/net/core/filter.c index b4adc961413f..66500d490995 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1528,6 +1528,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) info = &md->u.tun_info; info->mode = IP_TUNNEL_INFO_TX; + info->key.tun_flags = TUNNEL_KEY; info->key.tun_id = cpu_to_be64(from->tunnel_id); info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [net-next v2 01/14] i40e: don't degrade __le16
On Wed, 2015-08-26 at 15:49 -0700, Jeff Kirsher wrote: > diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c > b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c [] > @@ -2106,11 +2106,12 @@ int i40e_ndo_set_vf_port_vlan(struct net_device > *netdev, [] > dev_err(&pf->pdev->dev, > "VF %d has already configured VLAN filters and the > administrator is requesting a port VLAN override.\nPlease unload and reload > the VF driver for this change to take effect.\n", > vf_id); Unrelated trivia: This might be better with separate dev_err calls so there is a consistent prefix in the logging output. dev_err(&pf->pdev->dev, "VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\n", vf_id); dev_err(&pf->pdev->dev, "Please unload and reload the VF driver for this change to take effect.\n"); -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v2 02/14] i40e: add RX to port CRC errors label
From: Shannon Nelson The port.crc_errors is really an RX counter, so let's mark it as such. Change-ID: I179afd3f8a95d45229bb4163a6aeb01f0d2d250b Signed-off-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 83d41c2..d6c7eb7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -114,7 +114,7 @@ static struct i40e_stats i40e_gstrings_stats[] = { I40E_PF_STAT("tx_errors", stats.eth.tx_errors), I40E_PF_STAT("rx_dropped", stats.eth.rx_discards), I40E_PF_STAT("tx_dropped_link_down", stats.tx_dropped_link_down), - I40E_PF_STAT("crc_errors", stats.crc_errors), + I40E_PF_STAT("rx_crc_errors", stats.crc_errors), I40E_PF_STAT("illegal_bytes", stats.illegal_bytes), I40E_PF_STAT("mac_local_faults", stats.mac_local_faults), I40E_PF_STAT("mac_remote_faults", stats.mac_remote_faults), -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v2 07/14] i40e: Fix comment for ethtool diagnostic link test
From: Greg Rose The existing comment is incorrect. Add new comment to point out that the PF reset does not affect link but if the reset is changed to a different type that does affect link then the link test would need to be moved to before the reset. Change-ID: I28d786f46e9465860babdee61c1dba51016464df Reported-by: Jeremiah Kyle Signed-off-by: Greg Rose Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 74c16a1..62824f5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1626,11 +1626,13 @@ static void i40e_diag_test(struct net_device *netdev, /* indicate we're in test mode */ dev_close(netdev); else + /* This reset does not affect link - if it is +* changed to a type of reset that does affect +* link then the following link test would have +* to be moved to before the reset +*/ i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED)); - /* Link test performed before hardware reset -* so autoneg doesn't interfere with test result -*/ if (i40e_link_test(netdev, &data[I40E_ETH_TEST_LINK])) eth_test->flags |= ETH_TEST_FL_FAILED; -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v2 01/14] i40e: don't degrade __le16
From: Mitch Williams Sparse cries when we compare an __le16 to a u16, almost like it cares about architectures other than x86. Weird. Use the le16_to_cpu macro to make it stop crying. Change-ID: Id068f4d7868a2d3df234a791a76d15938f37db35 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c| 2 ++ drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3bb832a..ded62eb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1255,6 +1255,8 @@ struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, u8 *macaddr, struct i40e_mac_filter *f; list_for_each_entry(f, &vsi->mac_filter_list, list) { + if (vsi->info.pvid) + f->vlan = le16_to_cpu(vsi->info.pvid); if (!i40e_find_filter(vsi, macaddr, f->vlan, is_vf, is_netdev)) { if (!i40e_add_filter(vsi, macaddr, f->vlan, diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 8a7607c..70a6fb1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2106,11 +2106,12 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, goto error_pvid; } - if (vsi->info.pvid == (vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT))) + if (le16_to_cpu(vsi->info.pvid) == + (vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT))) /* duplicate request, so just return success */ goto error_pvid; - if (vsi->info.pvid == 0 && i40e_is_vsi_in_vlan(vsi)) { + if (le16_to_cpu(vsi->info.pvid) == 0 && i40e_is_vsi_in_vlan(vsi)) { dev_err(&pf->pdev->dev, "VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n", vf_id); -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v2 08/14] i40e: correct spelling error
From: Mitch Williams Turns out that 'inavlid' is an inavlid spelling for 'invalid'. Change-ID: Ie1fe2d0f8d1ba75ab880594875ec2e4152a76f61 Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 62824f5..e972b5e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2546,7 +2546,7 @@ static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, * @indir: indirection table * @key: hash key * - * Returns -EINVAL if the table specifies an inavlid queue id, otherwise + * Returns -EINVAL if the table specifies an invalid queue id, otherwise * returns 0 after programming the table. **/ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v2 00/14][pull request] Intel Wired LAN Driver Updates 2015-08-26
This series contains updates to i40e and i40evf only. Anjali provides a fix for i40e where the part is not receiving multicast or VLAN tagged packets when in promiscuous mode. This can occur when a software bridge is created on top of the device. Fixed the legacy and MSI interrupt mode in the driver, which was non-existent before since we were assuming MSIX was the only mode that the driver ran in. Fixed the i40evf driver, where the wrong defines were getting used in the VF driver. Mitch fixes a sparse warning about comparing __le16 to u16 so use le16_to_cpu() to resolve the warning. Also fixed a dyslexic spelling of invalid. Shannon adds port.crc_errors to receive CRC error counter, since it is a receive counter. Catherine provides a fix to move the stopping of the service task and flow director to i40e_shutdown() instead of i40e_suspend(). Greg fixes the ethtool offline diagnostic with netqueues, which just need to be treated the same as virtual functions when someone wants to run the ethtool offline diagnostic test. Also fixed up code comments for the i40e ethtool diagnostic test function. Cleans up redundant and unneeded messages, since the kernel notifies all VXLAN capable registered drivers, so no need to log this. Neerav adds the ability to update statistics per VEB per traffic class and dump it via ethtool. Jingjing adds support for virtual channel offload to support receive polling mode in the VF driver. v2: dropped patch which added helper functions into a header, feedback from David Miller was to make the functions constant to reduce the driver footprint, so remove the patch while Anjali works on making the requested changes. The following are changes since commit 8c5bbe77d4cd012668cdaf501bbd1cbfb9ad1d24: Merge branch 'act_bpf_lockless' and are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue master Anjali Singhai Jain (4): i40e: Fix legacy interrupt mode in the driver i40evf: Use the correct defines to match the VF registers i40evf: Remove PF specific register definitions from the VF i40e/i40evf: add VIRTCHNL_VF_OFFLOAD flag Catherine Sullivan (2): i40e: Move function calls to i40e_shutdown instead of i40e_suspend i40e/i40evf: Bump i40e to 1.3.9 and i40evf to 1.3.5 Greg Rose (3): i40e: Fix ethtool offline diagnostic with netqueues i40e: Fix comment for ethtool diagnostic link test i40e: Remove redundant and unneeded messages Mitch Williams (2): i40e: don't degrade __le16 i40e: correct spelling error Neerav Parikh (2): i40e/i40evf: Add capability to gather VEB per TC stats i40e/i40evf: Cache the CEE TLV status returned from firmware Shannon Nelson (1): i40e: add RX to port CRC errors label drivers/net/ethernet/intel/i40e/i40e.h |2 + drivers/net/ethernet/intel/i40e/i40e_dcb.c |4 + drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 54 +- drivers/net/ethernet/intel/i40e/i40e_main.c| 66 +- drivers/net/ethernet/intel/i40e/i40e_register.h|7 + drivers/net/ethernet/intel/i40e/i40e_type.h|9 + drivers/net/ethernet/intel/i40e/i40e_virtchnl.h|1 + drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 19 +- drivers/net/ethernet/intel/i40evf/i40e_adminq.c| 17 +- drivers/net/ethernet/intel/i40evf/i40e_common.c|2 +- drivers/net/ethernet/intel/i40evf/i40e_register.h | 3093 drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 28 +- drivers/net/ethernet/intel/i40evf/i40e_type.h |9 + drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h |1 + drivers/net/ethernet/intel/i40evf/i40evf_main.c| 42 +- 15 files changed, 188 insertions(+), 3166 deletions(-) -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v2 09/14] i40evf: Use the correct defines to match the VF registers
From: Anjali Singhai Jain Use CTLN1 instead of CTLN for the VF relative register space. Change-ID: Iefba63faf0307af55fec8dbb64f26059f7d91318 Signed-off-by: Anjali Singhai Jain Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 28 +- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 38 - 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 7309479..7e91d82 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1293,17 +1293,17 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, old_itr = q_vector->rx.itr; i40e_set_new_dynamic_itr(&q_vector->rx); if (old_itr != q_vector->rx.itr) { - val = I40E_VFINT_DYN_CTLN_INTENA_MASK | - I40E_VFINT_DYN_CTLN_CLEARPBA_MASK | + val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | + I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | (I40E_RX_ITR << - I40E_VFINT_DYN_CTLN_ITR_INDX_SHIFT) | + I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | (q_vector->rx.itr << - I40E_VFINT_DYN_CTLN_INTERVAL_SHIFT); + I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); } else { - val = I40E_VFINT_DYN_CTLN_INTENA_MASK | - I40E_VFINT_DYN_CTLN_CLEARPBA_MASK | + val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | + I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | (I40E_ITR_NONE << - I40E_VFINT_DYN_CTLN_ITR_INDX_SHIFT); + I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT); } if (!test_bit(__I40E_DOWN, &vsi->state)) wr32(hw, I40E_VFINT_DYN_CTLN1(vector - 1), val); @@ -1315,18 +1315,18 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, old_itr = q_vector->tx.itr; i40e_set_new_dynamic_itr(&q_vector->tx); if (old_itr != q_vector->tx.itr) { - val = I40E_VFINT_DYN_CTLN_INTENA_MASK | - I40E_VFINT_DYN_CTLN_CLEARPBA_MASK | + val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | + I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | (I40E_TX_ITR << - I40E_VFINT_DYN_CTLN_ITR_INDX_SHIFT) | + I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | (q_vector->tx.itr << - I40E_VFINT_DYN_CTLN_INTERVAL_SHIFT); + I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); } else { - val = I40E_VFINT_DYN_CTLN_INTENA_MASK | - I40E_VFINT_DYN_CTLN_CLEARPBA_MASK | + val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | + I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | (I40E_ITR_NONE << - I40E_VFINT_DYN_CTLN_ITR_INDX_SHIFT); + I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT); } if (!test_bit(__I40E_DOWN, &vsi->state)) wr32(hw, I40E_VFINT_DYN_CTLN1(vector - 1), val); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 2a6063a..c2ba40f 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -204,7 +204,7 @@ static void i40evf_misc_irq_enable(struct i40evf_adapter *adapter) wr32(hw, I40E_VFINT_DYN_CTL01, I40E_VFINT_DYN_CTL01_INTENA_MASK | I40E_VFINT_DYN_CTL01_ITR_INDX_MASK); - wr32(hw, I40E_VFINT_ICR0_ENA1, I40E_VFINT_ICR0_ENA_ADMINQ_MASK); + wr32(hw, I40E_VFINT_ICR0_ENA1, I40E_VFINT_ICR0_ENA1_ADMINQ_MASK); /* read flush */ rd32(hw, I40E_VFGEN_RSTAT); @@ -245,7 +245,7 @@ void i40evf_irq_enable_queues(struct i40evf_adapter *adapter, u32 mask) wr32(hw, I40E_VFINT_DYN_CTLN1(i - 1), I40E_VFINT_DYN_CTLN1_INTENA_MASK | I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | -I40E_VFINT_DYN_CTLN_CLEARPBA_MASK); +I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK); } } } @@ -263,17 +263,17 @@ static void i40evf_fire_sw_int(struct i40evf_adapter *adapter, u32 mask) if (mask & 1) { dyn_ctl = rd32(hw, I40E_VFINT_D
[net-next v2 05/14] i40e: Fix ethtool offline diagnostic with netqueues
From: Greg Rose Treat netqueues the same way we do virtual functions when someone wants to run the ethtool offline diagnostic test. Change-ID: Id48d2b933f1fd0db7be06305a93c6ebe3dc821f5 Signed-off-by: Greg Rose Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 19 +-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index d6c7eb7..230d127 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1559,6 +1559,21 @@ static inline bool i40e_active_vfs(struct i40e_pf *pf) return false; } +static inline bool i40e_active_vmdqs(struct i40e_pf *pf) +{ + struct i40e_vsi **vsi = pf->vsi; + int i; + + for (i = 0; i < pf->num_alloc_vsi; i++) { + if (!vsi[i]) + continue; + if (vsi[i]->type == I40E_VSI_VMDQ2) + return true; + } + + return false; +} + static void i40e_diag_test(struct net_device *netdev, struct ethtool_test *eth_test, u64 *data) { @@ -1572,9 +1587,9 @@ static void i40e_diag_test(struct net_device *netdev, set_bit(__I40E_TESTING, &pf->state); - if (i40e_active_vfs(pf)) { + if (i40e_active_vfs(pf) || i40e_active_vmdqs(pf)) { dev_warn(&pf->pdev->dev, -"Please take active VFS offline and restart the adapter before running NIC diagnostics\n"); +"Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); data[I40E_ETH_TEST_REG] = 1; data[I40E_ETH_TEST_EEPROM] = 1; data[I40E_ETH_TEST_INTR]= 1; -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v2 11/14] i40e: Remove redundant and unneeded messages
From: Greg Rose The kernel notifies all VXLAN capable registered drivers, i.e. any driver that implements ndo_add_vxlan_port(), of the addition of a port so that the driver can track which ports are in use. There's no need to log this - it just fills the system log with useless and irksome noise. Also, when failing to init SR-IOV interfaces the driver was printing the same message twice. Just remove the inner printk and let the outer message catch enable as well as the other failures. Change-ID: Id5ecb1d425c2a357ee2bc1635dab24553831dade Signed-off-by: Greg Rose Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c| 2 -- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index c0bfb55..5b60558 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8068,8 +8068,6 @@ static void i40e_add_vxlan_port(struct net_device *netdev, pf->vxlan_ports[next_idx] = port; pf->pending_vxlan_bitmap |= BIT_ULL(next_idx); pf->flags |= I40E_FLAG_VXLAN_FILTER_SYNC; - - dev_info(&pf->pdev->dev, "adding vxlan port %d\n", ntohs(port)); } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 70a6fb1..ca7a568 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -921,8 +921,6 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs) if (pci_num_vf(pf->pdev) != num_alloc_vfs) { ret = pci_enable_sriov(pf->pdev, num_alloc_vfs); if (ret) { - dev_err(&pf->pdev->dev, - "Failed to enable SR-IOV, error %d.\n", ret); pf->num_alloc_vfs = 0; goto err_iov; } -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v2 12/14] i40e/i40evf: add VIRTCHNL_VF_OFFLOAD flag
From: Anjali Singhai Jain Add virtual channel offload capability to support RX polling mode in the VF. Change-ID: Ib643ae2a7506dfc75fc489fc207493fabefa4832 Signed-off-by: Jingjing Wu Signed-off-by: Anjali Singhai Jain Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_register.h| 7 +++ drivers/net/ethernet/intel/i40e/i40e_virtchnl.h| 1 + drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 12 drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h | 1 + 4 files changed, 21 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index acae6c74..dc0402f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -873,6 +873,13 @@ #define I40E_PFINT_CEQCTL_CAUSE_ENA_MASK I40E_MASK(0x1, I40E_PFINT_CEQCTL_CAUSE_ENA_SHIFT) #define I40E_PFINT_CEQCTL_INTEVENT_SHIFT 31 #define I40E_PFINT_CEQCTL_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_CEQCTL_INTEVENT_SHIFT) +#define I40E_GLINT_CTL 0x0003F800 /* Reset: CORER */ +#define I40E_GLINT_CTL_DIS_AUTOMASK_PF0_SHIFT 0 +#define I40E_GLINT_CTL_DIS_AUTOMASK_PF0_MASK I40E_MASK(0x1, I40E_GLINT_CTL_DIS_AUTOMASK_PF0_SHIFT) +#define I40E_GLINT_CTL_DIS_AUTOMASK_VF0_SHIFT 1 +#define I40E_GLINT_CTL_DIS_AUTOMASK_VF0_MASK I40E_MASK(0x1, I40E_GLINT_CTL_DIS_AUTOMASK_VF0_SHIFT) +#define I40E_GLINT_CTL_DIS_AUTOMASK_N_SHIFT 2 +#define I40E_GLINT_CTL_DIS_AUTOMASK_N_MASK I40E_MASK(0x1, I40E_GLINT_CTL_DIS_AUTOMASK_N_SHIFT) #define I40E_PFINT_DYN_CTL0 0x00038480 /* Reset: PFR */ #define I40E_PFINT_DYN_CTL0_INTENA_SHIFT 0 #define I40E_PFINT_DYN_CTL0_INTENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTL0_INTENA_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h index a7ab463..0f8d415 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h @@ -152,6 +152,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ0x0008 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG 0x0010 #define I40E_VIRTCHNL_VF_OFFLOAD_VLAN 0x0001 +#define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING0x0002 struct i40e_virtchnl_vf_resource { u16 num_vsis; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index ca7a568..d99c116 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -335,6 +335,18 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, wr32(hw, reg_idx, reg); } + /* if the vf is running in polling mode and using interrupt zero, +* need to disable auto-mask on enabling zero interrupt for VFs. +*/ + if ((vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING) && + (vector_id == 0)) { + reg = rd32(hw, I40E_GLINT_CTL); + if (!(reg & I40E_GLINT_CTL_DIS_AUTOMASK_VF0_MASK)) { + reg |= I40E_GLINT_CTL_DIS_AUTOMASK_VF0_MASK; + wr32(hw, I40E_GLINT_CTL, reg); + } + } + irq_list_done: i40e_flush(hw); } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h index 1e89dea..e6db20e 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h @@ -152,6 +152,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ0x0008 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG 0x0010 #define I40E_VIRTCHNL_VF_OFFLOAD_VLAN 0x0001 +#define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING0x0002 struct i40e_virtchnl_vf_resource { u16 num_vsis; -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v2 04/14] i40e: Fix legacy interrupt mode in the driver
From: Anjali Singhai Jain This patch fixes the driver flow to take into account legacy interrupts. Over time we added code that assumes MSIX is the only mode that the driver runs in. It also enables a legacy workaround to trigger SWINT when the TX ring has non-cache aligned descriptors pending and interrupts are disabled. We work with a single vector in MSI mode too, so apply the same restrictions as Legacy. Change-ID: I826ddff1f9bd45d2dbe11f56a3ddcef0dbf42563 Signed-off-by: Anjali Singhai Jain Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 16 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 07bfe61..481f427 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1550,7 +1550,10 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, * vectors available and so we need to lower the used * q count. */ - qcount = min_t(int, vsi->alloc_queue_pairs, pf->num_lan_msix); + if (pf->flags & I40E_FLAG_MSIX_ENABLED) + qcount = min_t(int, vsi->alloc_queue_pairs, pf->num_lan_msix); + else + qcount = vsi->alloc_queue_pairs; num_tc_qps = qcount / numtc; num_tc_qps = min_t(int, num_tc_qps, i40e_pf_get_max_q_per_tc(pf)); @@ -1614,7 +1617,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, if ((vsi->type == I40E_VSI_MAIN) && (numtc == 1)) { if (vsi->req_queue_pairs > 0) vsi->num_queue_pairs = vsi->req_queue_pairs; - else + else if (pf->flags & I40E_FLAG_MSIX_ENABLED) vsi->num_queue_pairs = pf->num_lan_msix; } @@ -3416,7 +3419,7 @@ static irqreturn_t i40e_fdir_clean_ring(int irq, void *data) * @v_idx: vector index * @qp_idx: queue pair index **/ -static void map_vector_to_qp(struct i40e_vsi *vsi, int v_idx, int qp_idx) +static void i40e_map_vector_to_qp(struct i40e_vsi *vsi, int v_idx, int qp_idx) { struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx]; struct i40e_ring *tx_ring = vsi->tx_rings[qp_idx]; @@ -3470,7 +3473,7 @@ static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi) q_vector->tx.ring = NULL; while (num_ringpairs--) { - map_vector_to_qp(vsi, v_start, qp_idx); + i40e_map_vector_to_qp(vsi, v_start, qp_idx); qp_idx++; qp_remaining--; } @@ -8798,6 +8801,11 @@ static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi) goto vector_setup_out; } + /* In Legacy mode, we do not have to get any other vector since we +* piggyback on the misc/ICR0 for queue interrupts. + */ + if (!(pf->flags & I40E_FLAG_MSIX_ENABLED)) + return ret; if (vsi->num_q_vectors) vsi->base_vector = i40e_get_lump(pf, pf->irq_pile, vsi->num_q_vectors, vsi->idx); -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v2 03/14] i40e: Move function calls to i40e_shutdown instead of i40e_suspend
From: Catherine Sullivan We should be stopping the service task and flow director on shutdown not on suspension. Signed-off-by: Catherine Sullivan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 16 +--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ded62eb..07bfe61 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10450,6 +10450,19 @@ static void i40e_shutdown(struct pci_dev *pdev) wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); + del_timer_sync(&pf->service_timer); + cancel_work_sync(&pf->service_task); + i40e_fdir_teardown(pf); + + rtnl_lock(); + i40e_prep_for_reset(pf); + rtnl_unlock(); + + wr32(hw, I40E_PFPM_APM, +(pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); + wr32(hw, I40E_PFPM_WUFC, +(pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); + i40e_clear_interrupt_scheme(pf); if (system_state == SYSTEM_POWER_OFF) { @@ -10470,9 +10483,6 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) set_bit(__I40E_SUSPENDED, &pf->state); set_bit(__I40E_DOWN, &pf->state); - del_timer_sync(&pf->service_timer); - cancel_work_sync(&pf->service_task); - i40e_fdir_teardown(pf); rtnl_lock(); i40e_prep_for_reset(pf); -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v2 14/14] i40e/i40evf: Bump i40e to 1.3.9 and i40evf to 1.3.5
From: Catherine Sullivan Bump version and update the copyright year for i40evf. Change-ID: Iddb81b9dba09f0dc57ab54937b5821ecdd721ff6 Signed-off-by: Catherine Sullivan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5b60558..a97f193 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -39,7 +39,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 3 -#define DRV_VERSION_BUILD 6 +#define DRV_VERSION_BUILD 9 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD)DRV_KERN diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index c2ba40f..e85849b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -34,10 +34,10 @@ char i40evf_driver_name[] = "i40evf"; static const char i40evf_driver_string[] = "Intel(R) XL710/X710 Virtual Function Network Driver"; -#define DRV_VERSION "1.3.2" +#define DRV_VERSION "1.3.5" const char i40evf_driver_version[] = DRV_VERSION; static const char i40evf_copyright[] = - "Copyright (c) 2013 - 2014 Intel Corporation."; + "Copyright (c) 2013 - 2015 Intel Corporation."; /* i40evf_pci_tbl - PCI Device ID Table * -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v2 13/14] i40e/i40evf: Cache the CEE TLV status returned from firmware
From: Neerav Parikh Store the CEE TLV status returned by firmware to allow drivers to dump that for debug purposes. Change-ID: Ie3c4cf8cebabee4f15e1e3fdc4fc8a68bbca40ee Signed-off-by: Neerav Parikh Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_dcb.c| 4 drivers/net/ethernet/intel/i40e/i40e_type.h | 1 + drivers/net/ethernet/intel/i40evf/i40e_type.h | 1 + 3 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 2547aa2..90de46a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -588,6 +588,8 @@ i40e_status i40e_get_dcb_config(struct i40e_hw *hw) if (!ret) { /* CEE mode */ hw->local_dcbx_config.dcbx_mode = I40E_DCBX_MODE_CEE; + hw->local_dcbx_config.tlv_status = + le16_to_cpu(cee_v1_cfg.tlv_status); i40e_cee_to_dcb_v1_config(&cee_v1_cfg, &hw->local_dcbx_config); } @@ -597,6 +599,8 @@ i40e_status i40e_get_dcb_config(struct i40e_hw *hw) if (!ret) { /* CEE mode */ hw->local_dcbx_config.dcbx_mode = I40E_DCBX_MODE_CEE; + hw->local_dcbx_config.tlv_status = + le32_to_cpu(cee_cfg.tlv_status); i40e_cee_to_dcb_config(&cee_cfg, &hw->local_dcbx_config); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index acb2aad..4842239 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -440,6 +440,7 @@ struct i40e_dcbx_config { #define I40E_DCBX_MODE_CEE 0x1 #define I40E_DCBX_MODE_IEEE0x2 u32 numapps; + u32 tlv_status; /* CEE mode TLV status */ struct i40e_dcb_ets_config etscfg; struct i40e_dcb_ets_config etsrec; struct i40e_dcb_pfc_config pfc; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 1ab2498..24a2693 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -434,6 +434,7 @@ struct i40e_ieee_app_priority_table { struct i40e_dcbx_config { u32 numapps; + u32 tlv_status; /* CEE mode TLV status */ struct i40e_ieee_ets_config etscfg; struct i40e_ieee_ets_recommend etsrec; struct i40e_ieee_pfc_config pfc; -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[net-next v2 06/14] i40e/i40evf: Add capability to gather VEB per TC stats
From: Neerav Parikh This patch adds capability to update per VEB per TC statistics and dump it via ethtool. It also adds a structure to hold VEB per TC statistics. The fields can be filled by reading the GLVEBTC_* counters. Change-ID: I28b4759b9ab6ad5a61f046a1bc9ef6b16fe31538 Signed-off-by: Neerav Parikh Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 2 ++ drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 23 - drivers/net/ethernet/intel/i40e/i40e_main.c| 28 +- drivers/net/ethernet/intel/i40e/i40e_type.h| 8 drivers/net/ethernet/intel/i40evf/i40e_type.h | 8 5 files changed, 67 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 0f97883..05df21c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -442,6 +442,8 @@ struct i40e_veb { bool stat_offsets_loaded; struct i40e_eth_stats stats; struct i40e_eth_stats stats_offsets; + struct i40e_veb_tc_stats tc_stats; + struct i40e_veb_tc_stats tc_stats_offsets; }; /* struct that defines a VSI, associated with a dev */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 230d127..74c16a1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -197,7 +197,14 @@ static const struct i40e_stats i40e_gstrings_fcoe_stats[] = { FIELD_SIZEOF(struct i40e_pf, stats.priority_xon_tx) + \ FIELD_SIZEOF(struct i40e_pf, stats.priority_xon_2_xoff)) \ / sizeof(u64)) +#define I40E_VEB_TC_STATS_LEN ( \ + (FIELD_SIZEOF(struct i40e_veb, tc_stats.tc_rx_packets) + \ +FIELD_SIZEOF(struct i40e_veb, tc_stats.tc_rx_bytes) + \ +FIELD_SIZEOF(struct i40e_veb, tc_stats.tc_tx_packets) + \ +FIELD_SIZEOF(struct i40e_veb, tc_stats.tc_tx_bytes)) \ +/ sizeof(u64)) #define I40E_VEB_STATS_LEN ARRAY_SIZE(i40e_gstrings_veb_stats) +#define I40E_VEB_STATS_TOTAL (I40E_VEB_STATS_LEN + I40E_VEB_TC_STATS_LEN) #define I40E_PF_STATS_LEN(n) (I40E_GLOBAL_STATS_LEN + \ I40E_PFC_STATS_LEN + \ I40E_VSI_STATS_LEN((n))) @@ -1257,7 +1264,7 @@ static int i40e_get_sset_count(struct net_device *netdev, int sset) int len = I40E_PF_STATS_LEN(netdev); if (pf->lan_veb != I40E_NO_VEB) - len += I40E_VEB_STATS_LEN; + len += I40E_VEB_STATS_TOTAL; return len; } else { return I40E_VSI_STATS_LEN(netdev); @@ -1408,6 +1415,20 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset, i40e_gstrings_veb_stats[i].stat_string); p += ETH_GSTRING_LEN; } + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + snprintf(p, ETH_GSTRING_LEN, +"veb.tc_%u_tx_packets", i); + p += ETH_GSTRING_LEN; + snprintf(p, ETH_GSTRING_LEN, +"veb.tc_%u_tx_bytes", i); + p += ETH_GSTRING_LEN; + snprintf(p, ETH_GSTRING_LEN, +"veb.tc_%u_rx_packets", i); + p += ETH_GSTRING_LEN; + snprintf(p, ETH_GSTRING_LEN, +"veb.tc_%u_rx_bytes", i); + p += ETH_GSTRING_LEN; + } } for (i = 0; i < I40E_GLOBAL_STATS_LEN; i++) { snprintf(p, ETH_GSTRING_LEN, "port.%s", diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 481f427..c0bfb55 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -624,11 +624,15 @@ static void i40e_update_veb_stats(struct i40e_veb *veb) struct i40e_hw *hw = &pf->hw; struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */ - int idx = 0; + struct i40e_veb_tc_stats *veb_oes; + struct i40e_veb_tc_stats *veb_es; + int i, idx = 0; idx = veb->stats_idx; es = &veb->stats; oes = &veb->stats_offsets; + veb_es = &veb->tc_stats; + veb_oes = &veb->tc_stats_offsets; /* Gather up the stats that the hw collects */ i40e_stat_update32(hw, I40E_GLSW_TDP
[Patch net-next 2/5] net_sched: move TCQ_F_MQROOT into qdisc ops
It is just another static flag which can be moved. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang --- include/net/sch_generic.h | 2 +- net/sched/sch_api.c | 6 +++--- net/sched/sch_mq.c| 2 +- net/sched/sch_mqprio.c| 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index fe835e1..943736a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -50,7 +50,6 @@ struct Qdisc { unsigned intflags; #define TCQ_F_INGRESS 2 #define TCQ_F_CAN_BYPASS 4 -#define TCQ_F_MQROOT 8 #define TCQ_F_ONETXQUEUE 0x10 /* dequeue_skb() can assume all skbs are for * q->dev_queue : It can test * netif_xmit_frozen_or_stopped() before @@ -181,6 +180,7 @@ struct Qdisc_ops { charid[IFNAMSIZ]; int priv_size; #define QDISC_F_BUILTIN1 +#define QDISC_F_MQ 2 unsigned intflags; int (*enqueue)(struct sk_buff *, struct Qdisc *); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f2b194b..90a4cf9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -970,12 +970,12 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, spinlock_t *root_lock; err = -EOPNOTSUPP; - if (sch->flags & TCQ_F_MQROOT) + if (sch->ops->flags & QDISC_F_MQ) goto err_out4; if ((sch->parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS) && - (!p || !(p->flags & TCQ_F_MQROOT))) + (!p || !(p->ops->flags & QDISC_F_MQ))) root_lock = qdisc_root_sleeping_lock(sch); else root_lock = qdisc_lock(sch); @@ -1041,7 +1041,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) if (tca[TCA_RATE]) { /* NB: ignores errors from replace_estimator because change can't be undone. */ - if (sch->flags & TCQ_F_MQROOT) + if (sch->ops->flags & QDISC_F_MQ) goto out; gen_replace_estimator(&sch->bstats, sch->cpu_bstats, diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index f3cbaec..cab9fc2 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -66,7 +66,6 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) qdisc->flags |= TCQ_F_ONETXQUEUE; } - sch->flags |= TCQ_F_MQROOT; return 0; err: @@ -237,6 +236,7 @@ static const struct Qdisc_class_ops mq_class_ops = { struct Qdisc_ops mq_qdisc_ops __read_mostly = { .cl_ops = &mq_class_ops, .id = "mq", + .flags = QDISC_F_MQ, .priv_size = sizeof(struct mq_sched), .init = mq_init, .destroy= mq_destroy, diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 3811a74..dc208c2 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -155,7 +155,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < TC_BITMASK + 1; i++) netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]); - sch->flags |= TCQ_F_MQROOT; return 0; err: @@ -404,6 +403,7 @@ static const struct Qdisc_class_ops mqprio_class_ops = { static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { .cl_ops = &mqprio_class_ops, .id = "mqprio", + .flags = QDISC_F_MQ, .priv_size = sizeof(struct mqprio_sched), .init = mqprio_init, .destroy= mqprio_destroy, -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Patch net-next 1/5] net_sched: move some qdisc flag into qdisc ops
For those static flags, that is never changed dynamically, we could just move them into qdisc->ops. This will be used by the following patches. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang --- include/net/sch_generic.h | 3 ++- net/sched/sch_api.c | 4 ++-- net/sched/sch_generic.c | 10 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2eab08c..fe835e1 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -48,7 +48,6 @@ struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); struct sk_buff *(*dequeue)(struct Qdisc *dev); unsigned intflags; -#define TCQ_F_BUILTIN 1 #define TCQ_F_INGRESS 2 #define TCQ_F_CAN_BYPASS 4 #define TCQ_F_MQROOT 8 @@ -181,6 +180,8 @@ struct Qdisc_ops { const struct Qdisc_class_ops*cl_ops; charid[IFNAMSIZ]; int priv_size; +#define QDISC_F_BUILTIN1 + unsigned intflags; int (*enqueue)(struct sk_buff *, struct Qdisc *); struct sk_buff *(*dequeue)(struct Qdisc *); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f06aa01..f2b194b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -260,7 +260,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) { struct Qdisc *q; - if (!(root->flags & TCQ_F_BUILTIN) && + if (!(root->ops->flags & QDISC_F_BUILTIN) && root->handle == handle) return root; @@ -1384,7 +1384,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, static bool tc_qdisc_dump_ignore(struct Qdisc *q) { - return (q->flags & TCQ_F_BUILTIN) ? true : false; + return (q->ops->flags & QDISC_F_BUILTIN) ? true : false; } static int qdisc_notify(struct net *net, struct sk_buff *oskb, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 942fea8..460388a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -392,6 +392,7 @@ static struct sk_buff *noop_dequeue(struct Qdisc *qdisc) struct Qdisc_ops noop_qdisc_ops __read_mostly = { .id = "noop", + .flags = QDISC_F_BUILTIN, .priv_size = 0, .enqueue= noop_enqueue, .dequeue= noop_dequeue, @@ -407,7 +408,6 @@ static struct netdev_queue noop_netdev_queue = { struct Qdisc noop_qdisc = { .enqueue= noop_enqueue, .dequeue= noop_dequeue, - .flags = TCQ_F_BUILTIN, .ops= &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), @@ -418,6 +418,7 @@ EXPORT_SYMBOL(noop_qdisc); static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .id = "noqueue", + .flags = QDISC_F_BUILTIN, .priv_size = 0, .enqueue= noop_enqueue, .dequeue= noop_dequeue, @@ -434,7 +435,6 @@ static struct netdev_queue noqueue_netdev_queue = { static struct Qdisc noqueue_qdisc = { .enqueue= NULL, .dequeue= noop_dequeue, - .flags = TCQ_F_BUILTIN, .ops= &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), @@ -676,7 +676,7 @@ void qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; - if (qdisc->flags & TCQ_F_BUILTIN || + if (ops->flags & QDISC_F_BUILTIN || !atomic_dec_and_test(&qdisc->refcnt)) return; @@ -777,7 +777,7 @@ static void transition_one_qdisc(struct net_device *dev, struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; int *need_watchdog_p = _need_watchdog; - if (!(new_qdisc->flags & TCQ_F_BUILTIN)) + if (!(new_qdisc->ops->flags & QDISC_F_BUILTIN)) clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state); rcu_assign_pointer(dev_queue->qdisc, new_qdisc); @@ -826,7 +826,7 @@ static void dev_deactivate_queue(struct net_device *dev, if (qdisc) { spin_lock_bh(qdisc_lock(qdisc)); - if (!(qdisc->flags & TCQ_F_BUILTIN)) + if (!(qdisc->ops->flags & QDISC_F_BUILTIN)) set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); rcu_assign_pointer(dev_queue->qdisc, qdisc_default); -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majo
[Patch net-next 0/5] net_sched: introduce static flags for qdisc's
The main goal of this patchset is to improve the behavior of setting the default qdisc. Current behavior has no error check, no check for ingress and _can_ crash the kernel with some buggy implementation. We only have flags for each instance of qdisc's, for flags like if a qdisc is a fifo qdisc, they can simply be moved into qdisc->ops, as shown by patch 1, 2, 5. Patch 4 just uses this for error checking when setting default qdisc. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang --- Cong Wang (5): net_sched: move some qdisc flag into qdisc ops net_sched: move TCQ_F_MQROOT into qdisc ops net_sched: use a flag to indicate fifo qdiscs instead of the name net_sched: forbid setting default qdisc to inappropriate ones net_sched: move ingress flag into qdisc ops include/net/sch_generic.h | 9 ++--- net/sched/sch_api.c | 40 +++- net/sched/sch_fifo.c | 6 -- net/sched/sch_fq.c| 1 + net/sched/sch_fq_codel.c | 1 + net/sched/sch_generic.c | 11 ++- net/sched/sch_ingress.c | 1 + net/sched/sch_mq.c| 2 +- net/sched/sch_mqprio.c| 2 +- net/sched/sch_sfq.c | 1 + 10 files changed, 49 insertions(+), 25 deletions(-) -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Patch net-next 5/5] net_sched: move ingress flag into qdisc ops
Cc: Jamal Hadi Salim Cc: Stephen Hemminger Signed-off-by: Cong Wang --- include/net/sch_generic.h | 2 +- net/sched/sch_api.c | 11 +-- net/sched/sch_ingress.c | 1 + 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2e6748e..a02bf04 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -48,7 +48,6 @@ struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); struct sk_buff *(*dequeue)(struct Qdisc *dev); unsigned intflags; -#define TCQ_F_INGRESS 2 #define TCQ_F_CAN_BYPASS 4 #define TCQ_F_ONETXQUEUE 0x10 /* dequeue_skb() can assume all skbs are for * q->dev_queue : It can test @@ -183,6 +182,7 @@ struct Qdisc_ops { #define QDISC_F_MQ 2 #define QDISC_F_FIFO 4 #define QDISC_F_DEFAULTABLE8 +#define QDISC_F_INGRESS16 unsigned intflags; int (*enqueue)(struct sk_buff *, struct Qdisc *); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e501e9d..7f285fa8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -281,7 +281,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) void qdisc_list_add(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + if ((q->parent != TC_H_ROOT) && !(q->ops->flags & QDISC_F_INGRESS)) { struct Qdisc *root = qdisc_dev(q)->qdisc; WARN_ON_ONCE(root == &noop_qdisc); @@ -292,7 +292,7 @@ EXPORT_SYMBOL(qdisc_list_add); void qdisc_list_del(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) + if ((q->parent != TC_H_ROOT) && !(q->ops->flags & QDISC_F_INGRESS)) list_del(&q->list); } EXPORT_SYMBOL(qdisc_list_del); @@ -812,8 +812,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, ingress = 0; num_q = dev->num_tx_queues; - if ((q && q->flags & TCQ_F_INGRESS) || - (new && new->flags & TCQ_F_INGRESS)) { + if ((q && q->ops->flags & QDISC_F_INGRESS) || + (new && new->ops->flags & QDISC_F_INGRESS)) { num_q = 1; ingress = 1; if (!dev_ingress_queue(dev)) @@ -937,7 +937,6 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, sch->parent = parent; if (handle == TC_H_INGRESS) { - sch->flags |= TCQ_F_INGRESS; handle = TC_H_MAKE(TC_H_INGRESS, 0); lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock); } else { @@ -982,7 +981,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out4; if ((sch->parent != TC_H_ROOT) && - !(sch->flags & TCQ_F_INGRESS) && + !(sch->ops->flags & QDISC_F_INGRESS) && (!p || !(p->ops->flags & QDISC_F_MQ))) root_lock = qdisc_root_sleeping_lock(sch); else diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index e7c648f..2e30b39 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -92,6 +92,7 @@ static const struct Qdisc_class_ops ingress_class_ops = { static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .cl_ops = &ingress_class_ops, .id = "ingress", + .flags = QDISC_F_INGRESS, .init = ingress_init, .destroy= ingress_destroy, .dump = ingress_dump, -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Patch net-next 4/5] net_sched: forbid setting default qdisc to inappropriate ones
Currently there is no check for if a qdisc is appropriate to be used as the default qdisc. This causes we get no error even we set the default qdisc to an inappropriate one but an error will be shown up later. This is not good. Also, for qdisc's like HTB, kernel will just crash when we use it as default qdisc, because some data structures are not even initialized yet before checking opt == NULL, the cleanup doing ->reset() or ->destroy() on them will just crash. Let's fail as early as we can. Cc: Jamal Hadi Salim Cc: Stephen Hemminger Signed-off-by: Cong Wang --- include/net/sch_generic.h | 1 + net/sched/sch_api.c | 12 ++-- net/sched/sch_fifo.c | 6 +++--- net/sched/sch_fq.c| 1 + net/sched/sch_fq_codel.c | 1 + net/sched/sch_generic.c | 2 +- net/sched/sch_mq.c| 2 +- net/sched/sch_sfq.c | 1 + 8 files changed, 19 insertions(+), 7 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f7ad38a..2e6748e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -182,6 +182,7 @@ struct Qdisc_ops { #define QDISC_F_BUILTIN1 #define QDISC_F_MQ 2 #define QDISC_F_FIFO 4 +#define QDISC_F_DEFAULTABLE8 unsigned intflags; int (*enqueue)(struct sk_buff *, struct Qdisc *); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 90a4cf9..e501e9d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -227,6 +227,7 @@ static struct Qdisc_ops *qdisc_lookup_default(const char *name) int qdisc_set_default(const char *name) { const struct Qdisc_ops *ops; + int err = 0; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -243,13 +244,20 @@ int qdisc_set_default(const char *name) } if (ops) { + if (!(ops->flags & QDISC_F_DEFAULTABLE)) { + err = -EINVAL; + goto unlock; + } /* Set new default */ module_put(default_qdisc_ops->owner); default_qdisc_ops = ops; + } else { + err = -ENOENT; } - write_unlock(&qdisc_mod_lock); - return ops ? 0 : -ENOENT; +unlock: + write_unlock(&qdisc_mod_lock); + return err; } /* We know handle. Find qdisc among all qdisc's attached to device diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index e51d786..83947f6 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -96,7 +96,7 @@ static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .id = "pfifo", .priv_size = 0, - .flags = QDISC_F_FIFO, + .flags = QDISC_F_FIFO | QDISC_F_DEFAULTABLE, .enqueue= pfifo_enqueue, .dequeue= qdisc_dequeue_head, .peek = qdisc_peek_head, @@ -112,7 +112,7 @@ EXPORT_SYMBOL(pfifo_qdisc_ops); struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .id = "bfifo", .priv_size = 0, - .flags = QDISC_F_FIFO, + .flags = QDISC_F_FIFO | QDISC_F_DEFAULTABLE, .enqueue= bfifo_enqueue, .dequeue= qdisc_dequeue_head, .peek = qdisc_peek_head, @@ -128,7 +128,7 @@ EXPORT_SYMBOL(bfifo_qdisc_ops); struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { .id = "pfifo_head_drop", .priv_size = 0, - .flags = QDISC_F_FIFO, + .flags = QDISC_F_FIFO | QDISC_F_DEFAULTABLE, .enqueue= pfifo_tail_enqueue, .dequeue= qdisc_dequeue_head, .peek = qdisc_peek_head, diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index f377702..e543b41 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -831,6 +831,7 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) static struct Qdisc_ops fq_qdisc_ops __read_mostly = { .id = "fq", + .flags = QDISC_F_DEFAULTABLE, .priv_size = sizeof(struct fq_sched_data), .enqueue= fq_enqueue, diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index a9ba030..f8f5e82 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -625,6 +625,7 @@ static const struct Qdisc_class_ops fq_codel_class_ops = { static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = { .cl_ops = &fq_codel_class_ops, .id = "fq_codel", + .flags = QDISC_F_DEFAULTABLE, .priv_size = sizeof(struct fq_codel_sched_data), .enqueue= fq_codel_enqueue, .d
[Patch net-next 3/5] net_sched: use a flag to indicate fifo qdiscs instead of the name
Relying on its name is a bad practice. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang --- include/net/sch_generic.h | 1 + net/sched/sch_fifo.c | 6 -- net/sched/sch_generic.c | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 943736a..f7ad38a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -181,6 +181,7 @@ struct Qdisc_ops { int priv_size; #define QDISC_F_BUILTIN1 #define QDISC_F_MQ 2 +#define QDISC_F_FIFO 4 unsigned intflags; int (*enqueue)(struct sk_buff *, struct Qdisc *); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 2177eac..e51d786 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -96,6 +96,7 @@ static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .id = "pfifo", .priv_size = 0, + .flags = QDISC_F_FIFO, .enqueue= pfifo_enqueue, .dequeue= qdisc_dequeue_head, .peek = qdisc_peek_head, @@ -111,6 +112,7 @@ EXPORT_SYMBOL(pfifo_qdisc_ops); struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .id = "bfifo", .priv_size = 0, + .flags = QDISC_F_FIFO, .enqueue= bfifo_enqueue, .dequeue= qdisc_dequeue_head, .peek = qdisc_peek_head, @@ -126,6 +128,7 @@ EXPORT_SYMBOL(bfifo_qdisc_ops); struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { .id = "pfifo_head_drop", .priv_size = 0, + .flags = QDISC_F_FIFO, .enqueue= pfifo_tail_enqueue, .dequeue= qdisc_dequeue_head, .peek = qdisc_peek_head, @@ -143,8 +146,7 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit) struct nlattr *nla; int ret = -ENOMEM; - /* Hack to avoid sending change message to non-FIFO */ - if (strncmp(q->ops->id + 1, "fifo", 4) != 0) + if (!(q->ops->flags & QDISC_F_FIFO)) return 0; nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 460388a..70b7713 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -567,6 +567,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) struct Qdisc_ops pfifo_fast_ops __read_mostly = { .id = "pfifo_fast", .priv_size = sizeof(struct pfifo_fast_priv), + .flags = QDISC_F_FIFO, .enqueue= pfifo_fast_enqueue, .dequeue= pfifo_fast_dequeue, .peek = pfifo_fast_peek, -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next] route: fix breakage after moving lwtunnel state
On 08/26/15 at 06:19pm, Jiri Benc wrote: > might be a noise. However, there's definitely room for performance > improvement here, the lwtunnel vxlan throughput is at about ~40% of the > non-vxlan throughput. I did not spend too much time on analyzing this, yet, > but it's clear the dst_entry layout is not our biggest concern here. I'm currently working on reducing the overhead for VXLAN and Gre and effectively Geneve once Pravin's work is in. The main disadvantage of lwt based flow tunneling is the additional fib_lookup() performed for each packet. It seems tempting to cache the tunnel endpoint dst in the lwt state of the overlay route. It will usually point to the same dst for every packet. The cache behaviour if dependant on no fib rules are and the route is a single nexthop route. Did you test with a card that features UDP encapsulation offloads? -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net-next] net: sched: consolidate tc_classify{,_compat}
On 08/26/2015 11:54 PM, Alexei Starovoitov wrote: On 8/26/15 2:00 PM, Daniel Borkmann wrote: ... +reset: +if (unlikely(limit++ >= MAX_REC_LOOP)) { +net_notice_ratelimited("%s: reclassify loop, rule prio %u, " + "protocol %02x\n", tp->q->ops->id, + tp->prio & 0x, ntohs(tp->protocol)); why drop 'packet' and add two extra ',' in the message ? Not a big deal, just why bother? No deep underlying reason, thought it would make it slightly more readable. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 net-next 1/8] geneve: Initialize ethernet address in device setup.
Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross Acked-by: Thomas Graf Acked-by: John W. Linville --- drivers/net/geneve.c |4 +--- 1 files changed, 1 insertions(+), 3 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 897e1a3..95e9da0 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -297,6 +297,7 @@ static void geneve_setup(struct net_device *dev) netif_keep_dst(dev); dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; + eth_hw_addr_random(dev); } static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { @@ -364,9 +365,6 @@ static int geneve_newlink(struct net *net, struct net_device *dev, return -EBUSY; } - if (tb[IFLA_ADDRESS] == NULL) - eth_hw_addr_random(dev); - err = register_netdevice(dev); if (err) return err; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 net-next 4/8] geneve: Make dst-port configurable.
Add netlink interface to configure Geneve UDP port number. So that user can configure it for a Gevene device. Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross Acked-by: Thomas Graf Acked-by: John W. Linville --- drivers/net/geneve.c | 25 + include/uapi/linux/if_link.h |1 + 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 3c5b2b1..0a6d974 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -49,6 +49,7 @@ struct geneve_dev { u8 tos; /* TOS override */ struct sockaddr_in remote; /* IPv4 address for link partner */ struct list_head next;/* geneve's per namespace list */ + __be16 dst_port; }; static int geneve_net_id; @@ -64,6 +65,7 @@ static inline __u32 geneve_net_vni_hash(u8 vni[3]) /* geneve receive/decap routine */ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) { + struct inet_sock *sk = inet_sk(gs->sock->sk); struct genevehdr *gnvh = geneve_hdr(skb); struct geneve_dev *dummy, *geneve = NULL; struct geneve_net *gn; @@ -82,7 +84,8 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) vni_list_head = &gn->vni_list[hash]; hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) { if (!memcmp(gnvh->vni, dummy->vni, sizeof(dummy->vni)) && - iph->saddr == dummy->remote.sin_addr.s_addr) { + iph->saddr == dummy->remote.sin_addr.s_addr && + sk->inet_sport == dummy->dst_port) { geneve = dummy; break; } @@ -157,7 +160,7 @@ static int geneve_open(struct net_device *dev) struct geneve_net *gn = net_generic(geneve->net, geneve_net_id); struct geneve_sock *gs; - gs = geneve_sock_add(net, htons(GENEVE_UDP_PORT), geneve_rx, gn, + gs = geneve_sock_add(net, geneve->dst_port, geneve_rx, gn, false, false); if (IS_ERR(gs)) return PTR_ERR(gs); @@ -228,7 +231,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) /* no need to handle local destination and encap bypass...yet... */ err = geneve_xmit_skb(gs, rt, skb, fl4.saddr, fl4.daddr, - tos, ttl, 0, sport, htons(GENEVE_UDP_PORT), 0, + tos, ttl, 0, sport, geneve->dst_port, 0, geneve->vni, 0, NULL, false, !net_eq(geneve->net, dev_net(geneve->dev))); if (err < 0) @@ -308,6 +311,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { [IFLA_GENEVE_REMOTE]= { .len = FIELD_SIZEOF(struct iphdr, daddr) }, [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, + [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, }; static int geneve_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -341,6 +345,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev, struct hlist_head *vni_list_head; struct sockaddr_in remote; /* IPv4 address for link partner */ __u32 vni, hash; + __be16 dst_port; int err; if (!data[IFLA_GENEVE_ID] || !data[IFLA_GENEVE_REMOTE]) @@ -359,13 +364,20 @@ static int geneve_newlink(struct net *net, struct net_device *dev, if (IN_MULTICAST(ntohl(geneve->remote.sin_addr.s_addr))) return -EINVAL; + if (data[IFLA_GENEVE_PORT]) + dst_port = htons(nla_get_u16(data[IFLA_GENEVE_PORT])); + else + dst_port = htons(GENEVE_UDP_PORT); + remote = geneve->remote; hash = geneve_net_vni_hash(geneve->vni); vni_list_head = &gn->vni_list[hash]; hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) { if (!memcmp(geneve->vni, dummy->vni, sizeof(dummy->vni)) && - !memcmp(&remote, &dummy->remote, sizeof(dummy->remote))) + !memcmp(&remote, &dummy->remote, sizeof(dummy->remote)) && + dst_port == dummy->dst_port) { return -EBUSY; + } } err = register_netdevice(dev); @@ -378,6 +390,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev, if (data[IFLA_GENEVE_TOS]) geneve->tos = nla_get_u8(data[IFLA_GENEVE_TOS]); + geneve->dst_port = dst_port; list_add(&geneve->next, &gn->geneve_list); hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]); @@ -402,6 +415,7 @@ static size_t geneve_get_size(const struct net_device *dev) nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */ nla_total_size(sizeof(__u8)) + /* IFLA_
[PATCH v4 net-next 5/8] geneve: Add support to collect tunnel metadata.
Following patch create new tunnel flag which enable tunnel metadata collection on given device. These devices can be used by tunnel metadata based routing or by OVS. Geneve Consolidation patch get rid of collect_md_tun to simplify tunnel lookup further. Signed-off-by: Pravin B Shelar --- v3-v4: - Do not set NETIF_F_NETNS_LOCAL v2-v3: - Do not allow regular and metadata tunnel devices on same port. --- drivers/net/geneve.c | 356 -- include/net/geneve.h |3 + include/uapi/linux/if_link.h |1 + 3 files changed, 275 insertions(+), 85 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 0a6d974..d05150c 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -36,6 +37,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); struct geneve_net { struct list_head geneve_list; struct hlist_head vni_list[VNI_HASH_SIZE]; + struct geneve_dev __rcu *collect_md_tun; }; /* Pseudo network device */ @@ -50,6 +52,7 @@ struct geneve_dev { struct sockaddr_in remote; /* IPv4 address for link partner */ struct list_head next;/* geneve's per namespace list */ __be16 dst_port; + bool collect_md; }; static int geneve_net_id; @@ -62,48 +65,95 @@ static inline __u32 geneve_net_vni_hash(u8 vni[3]) return hash_32(vnid, VNI_HASH_BITS); } -/* geneve receive/decap routine */ -static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) +static __be64 vni_to_tunnel_id(const __u8 *vni) +{ +#ifdef __BIG_ENDIAN + return (vni[0] << 16) | (vni[1] << 8) | vni[2]; +#else + return (__force __be64)(((__force u64)vni[0] << 40) | + ((__force u64)vni[1] << 48) | + ((__force u64)vni[2] << 56)); +#endif +} + +static struct geneve_dev *geneve_lookup(struct geneve_net *gn, + struct geneve_sock *gs, + struct iphdr *iph, + struct genevehdr *gnvh) { struct inet_sock *sk = inet_sk(gs->sock->sk); - struct genevehdr *gnvh = geneve_hdr(skb); - struct geneve_dev *dummy, *geneve = NULL; - struct geneve_net *gn; - struct iphdr *iph = NULL; - struct pcpu_sw_netstats *stats; struct hlist_head *vni_list_head; - int err = 0; + struct geneve_dev *geneve; __u32 hash; - iph = ip_hdr(skb); /* Still outer IP header... */ - - gn = gs->rcv_data; + geneve = rcu_dereference(gn->collect_md_tun); + if (geneve) + return geneve; /* Find the device for this VNI */ hash = geneve_net_vni_hash(gnvh->vni); vni_list_head = &gn->vni_list[hash]; - hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) { - if (!memcmp(gnvh->vni, dummy->vni, sizeof(dummy->vni)) && - iph->saddr == dummy->remote.sin_addr.s_addr && - sk->inet_sport == dummy->dst_port) { - geneve = dummy; - break; + hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { + if (!memcmp(gnvh->vni, geneve->vni, sizeof(geneve->vni)) && + iph->saddr == geneve->remote.sin_addr.s_addr && + sk->inet_sport == geneve->dst_port) { + return geneve; } } + return NULL; +} + +/* geneve receive/decap routine */ +static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) +{ + struct genevehdr *gnvh = geneve_hdr(skb); + struct metadata_dst *tun_dst = NULL; + struct geneve_dev *geneve = NULL; + struct pcpu_sw_netstats *stats; + struct geneve_net *gn; + struct iphdr *iph; + int err; + + iph = ip_hdr(skb); /* Still outer IP header... */ + gn = gs->rcv_data; + geneve = geneve_lookup(gn, gs, iph, gnvh); if (!geneve) goto drop; - /* Drop packets w/ critical options, -* since we don't support any... -*/ - if (gnvh->critical) - goto drop; + if (ip_tunnel_collect_metadata() || geneve->collect_md) { + __be16 flags; + void *opts; + + flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | + (gnvh->oam ? TUNNEL_OAM : 0) | + (gnvh->critical ? TUNNEL_CRIT_OPT : 0); + + tun_dst = udp_tun_rx_dst(skb, AF_INET, flags, +vni_to_tunnel_id(gnvh->vni), +gnvh->opt_len * 4); + if (!tun_dst) + goto drop; + + /* Update tunnel dst according to Geneve options. */ + op
[PATCH v4 net-next 2/8] geneve: Use skb mark and protocol to lookup route.
On packet transmit path geneve need to lookup route. Following patch improves route lookup using more parameters. Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross Acked-by: Thomas Graf Acked-by: John W. Linville --- drivers/net/geneve.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 95e9da0..3c5b2b1 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -202,6 +202,9 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) memset(&fl4, 0, sizeof(fl4)); fl4.flowi4_tos = RT_TOS(tos); fl4.daddr = geneve->remote.sin_addr.s_addr; + fl4.flowi4_mark = skb->mark; + fl4.flowi4_proto = IPPROTO_UDP; + rt = ip_route_output_key(geneve->net, &fl4); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 net-next 6/8] openvswitch: Use Geneve device.
With help of tunnel metadata mode OVS can directly use Geneve devices to implement Geneve tunnels. This patch removes all of the OVS specific Geneve code and make OVS use a Geneve net_device. Basic geneve vport is still there to handle compatibility with current userspace application. Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross --- net/openvswitch/Kconfig|2 +- net/openvswitch/vport-geneve.c | 179 +++ 2 files changed, 33 insertions(+), 148 deletions(-) diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 422dc05..87b98c0 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -59,7 +59,7 @@ config OPENVSWITCH_VXLAN config OPENVSWITCH_GENEVE tristate "Open vSwitch Geneve tunneling support" depends on OPENVSWITCH - depends on GENEVE_CORE + depends on GENEVE default OPENVSWITCH ---help--- If you say Y here, then the Open vSwitch will be able create geneve vport. diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index d01bd63..fa37c95 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -26,95 +26,44 @@ #include "datapath.h" #include "vport.h" +#include "vport-netdev.h" static struct vport_ops ovs_geneve_vport_ops; - /** * struct geneve_port - Keeps track of open UDP ports - * @gs: The socket created for this port number. - * @name: vport name. + * @dst_port: destination port. */ struct geneve_port { - struct geneve_sock *gs; - char name[IFNAMSIZ]; + u16 port_no; }; -static LIST_HEAD(geneve_ports); - static inline struct geneve_port *geneve_vport(const struct vport *vport) { return vport_priv(vport); } -/* Convert 64 bit tunnel ID to 24 bit VNI. */ -static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) -{ -#ifdef __BIG_ENDIAN - vni[0] = (__force __u8)(tun_id >> 16); - vni[1] = (__force __u8)(tun_id >> 8); - vni[2] = (__force __u8)tun_id; -#else - vni[0] = (__force __u8)((__force u64)tun_id >> 40); - vni[1] = (__force __u8)((__force u64)tun_id >> 48); - vni[2] = (__force __u8)((__force u64)tun_id >> 56); -#endif -} - -/* Convert 24 bit VNI to 64 bit tunnel ID. */ -static __be64 vni_to_tunnel_id(const __u8 *vni) -{ -#ifdef __BIG_ENDIAN - return (vni[0] << 16) | (vni[1] << 8) | vni[2]; -#else - return (__force __be64)(((__force u64)vni[0] << 40) | - ((__force u64)vni[1] << 48) | - ((__force u64)vni[2] << 56)); -#endif -} - -static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb) -{ - struct vport *vport = gs->rcv_data; - struct genevehdr *geneveh = geneve_hdr(skb); - int opts_len; - struct ip_tunnel_info tun_info; - __be64 key; - __be16 flags; - - opts_len = geneveh->opt_len * 4; - - flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | - (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | - (geneveh->oam ? TUNNEL_OAM : 0) | - (geneveh->critical ? TUNNEL_CRIT_OPT : 0); - - key = vni_to_tunnel_id(geneveh->vni); - - ip_tunnel_info_init(&tun_info, ip_hdr(skb), - udp_hdr(skb)->source, udp_hdr(skb)->dest, - key, flags, geneveh->options, opts_len); - - ovs_vport_receive(vport, skb, &tun_info); -} - static int geneve_get_options(const struct vport *vport, struct sk_buff *skb) { struct geneve_port *geneve_port = geneve_vport(vport); - struct inet_sock *sk = inet_sk(geneve_port->gs->sock->sk); - if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(sk->inet_sport))) + if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->port_no)) return -EMSGSIZE; return 0; } -static void geneve_tnl_destroy(struct vport *vport) +static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb, + struct ip_tunnel_info *egress_tun_info) { struct geneve_port *geneve_port = geneve_vport(vport); + struct net *net = ovs_dp_get_net(vport->dp); + __be16 dport = htons(geneve_port->port_no); + __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); - geneve_sock_release(geneve_port->gs); - - ovs_vport_deferred_free(vport); + return ovs_tunnel_get_egress_info(egress_tun_info, + ovs_dp_get_net(vport->dp), + OVS_CB(skb)->egress_tun_info, + IPPROTO_UDP, skb->mark, sport, dport); } static struct vport *geneve_tnl_create(const struct vport_parms *parms) @@ -122,11 +71,11 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) struct net *net = ovs_dp_get_net(parms->dp); struct nlattr *options
[PATCH v4 net-next 7/8] geneve: Consolidate Geneve functionality in single module.
geneve_core module handles send and receive functionality. This way OVS could use the Geneve API. Now with use of tunnel meatadata mode OVS can directly use Geneve netdevice. So there is no need for separate module for Geneve. Following patch consolidates Geneve protocol processing in single module. Signed-off-by: Pravin B Shelar --- v3-v4: - Fixed newlink() validation. v2-v3: - Fixed Kconfig dependency. - unified geneve_build_skb() - Fixed geneve_build_skb() error path. --- drivers/net/Kconfig|4 +- drivers/net/geneve.c | 512 +++- include/net/geneve.h | 34 net/ipv4/Kconfig | 14 -- net/ipv4/Makefile |1 - net/ipv4/geneve_core.c | 447 -- 6 files changed, 425 insertions(+), 587 deletions(-) delete mode 100644 net/ipv4/geneve_core.c diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 770483b..d18eb60 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -180,8 +180,8 @@ config VXLAN will be called vxlan. config GENEVE - tristate "Generic Network Virtualization Encapsulation netdev" - depends on INET && GENEVE_CORE + tristate "Generic Network Virtualization Encapsulation" + depends on INET && NET_UDP_TUNNEL select NET_IP_TUNNEL ---help--- This allows one to create geneve virtual interfaces that provide diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index d05150c..a36a1de 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -18,6 +18,7 @@ #include #include #include +#include #define GENEVE_NETDEV_VER "0.6" @@ -33,13 +34,18 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); +#define GENEVE_VER 0 +#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) + /* per-network namespace private data for this module */ struct geneve_net { - struct list_head geneve_list; - struct hlist_head vni_list[VNI_HASH_SIZE]; - struct geneve_dev __rcu *collect_md_tun; + struct list_headgeneve_list; + struct hlist_head vni_list[VNI_HASH_SIZE]; + struct list_headsock_list; }; +static int geneve_net_id; + /* Pseudo network device */ struct geneve_dev { struct hlist_node hlist; /* vni hash table */ @@ -55,7 +61,15 @@ struct geneve_dev { bool collect_md; }; -static int geneve_net_id; +struct geneve_sock { + boolcollect_md; + struct geneve_net *gn; + struct list_headlist; + struct socket *sock; + struct rcu_head rcu; + int refcnt; + struct udp_offload udp_offloads; +}; static inline __u32 geneve_net_vni_hash(u8 vni[3]) { @@ -76,51 +90,63 @@ static __be64 vni_to_tunnel_id(const __u8 *vni) #endif } -static struct geneve_dev *geneve_lookup(struct geneve_net *gn, - struct geneve_sock *gs, - struct iphdr *iph, - struct genevehdr *gnvh) +static struct geneve_dev *geneve_lookup(struct geneve_net *gn, __be16 port, + __be32 addr, u8 vni[]) { - struct inet_sock *sk = inet_sk(gs->sock->sk); struct hlist_head *vni_list_head; struct geneve_dev *geneve; __u32 hash; - geneve = rcu_dereference(gn->collect_md_tun); - if (geneve) - return geneve; - /* Find the device for this VNI */ - hash = geneve_net_vni_hash(gnvh->vni); + hash = geneve_net_vni_hash(vni); vni_list_head = &gn->vni_list[hash]; hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { - if (!memcmp(gnvh->vni, geneve->vni, sizeof(geneve->vni)) && - iph->saddr == geneve->remote.sin_addr.s_addr && - sk->inet_sport == geneve->dst_port) { + if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && + addr == geneve->remote.sin_addr.s_addr && + port == geneve->dst_port) { return geneve; } } return NULL; } +static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) +{ + return (struct genevehdr *)(udp_hdr(skb) + 1); +} + /* geneve receive/decap routine */ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) { + struct inet_sock *sk = inet_sk(gs->sock->sk); struct genevehdr *gnvh = geneve_hdr(skb); + struct geneve_net *gn = gs->gn; struct metadata_dst *tun_dst = NULL; struct geneve_dev *geneve = NULL; struct pcpu_sw_netstats *stats; - struct geneve_net *gn; struct iphdr *iph; + u8 *vni; + __be32 addr; +
[PATCH v4 net-next 0/8] Geneve: Add support for tunnel metadata mode
Following patches adds support for Geneve tunnel metadata mode. OVS can make use of Geneve net-device with tunnel metadata API from kernel. This also allows us to consolidate Geneve implementation from two kernel modules geneve_core and geneve to single geneve module. geneve_core module was targeted to share Geneve encap and decap code between Geneve netdevice and OVS Geneve tunnel implementation, Since OVS no longer needs these API, Geneve code can be consolidated into single geneve module. v3-v4: - Drop NETIF_F_NETNS_LOCAL feature. - Fix geneve device newlink check v2-v3: - make tunnel medata device and regular device mutually exclusive. - Fix Kconfig dependency for Geneve. - Fix dst-port netlink encoding. - drop changelink patch. v1-v2: - Replaced per hash table tunnel pointer (metadata enabled) with flag. - Added support for changelink. - Improve geneve device route lookup with more parameters. Pravin B Shelar (8): geneve: Initialize ethernet address in device setup. geneve: Use skb mark and protocol to lookup route. tunnel: introduce udp_tun_rx_dst() geneve: Make dst-port configurable. geneve: Add support to collect tunnel metadata. openvswitch: Use Geneve device. geneve: Consolidate Geneve functionality in single module. geneve: Move device hash table to geneve socket. drivers/net/Kconfig|4 +- drivers/net/geneve.c | 741 ++-- drivers/net/vxlan.c| 29 +-- include/net/dst_metadata.h | 61 include/net/geneve.h | 35 +-- include/net/udp_tunnel.h |4 + include/uapi/linux/if_link.h |2 + net/ipv4/Kconfig | 14 - net/ipv4/Makefile |1 - net/ipv4/geneve_core.c | 447 net/ipv4/ip_gre.c | 21 +- net/ipv4/udp_tunnel.c | 25 ++- net/openvswitch/Kconfig|2 +- net/openvswitch/vport-geneve.c | 179 ++ 14 files changed, 771 insertions(+), 794 deletions(-) delete mode 100644 net/ipv4/geneve_core.c -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 net-next 3/8] tunnel: introduce udp_tun_rx_dst()
Introduce function udp_tun_rx_dst() to initialize tunnel dst on receive path. Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross Acked-by: Thomas Graf --- drivers/net/vxlan.c| 29 ++-- include/net/dst_metadata.h | 61 include/net/udp_tunnel.h |4 +++ net/ipv4/ip_gre.c | 21 +++--- net/ipv4/udp_tunnel.c | 25 +- 5 files changed, 97 insertions(+), 43 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 61b457b..5b4cf66 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1264,36 +1264,13 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) } if (vxlan_collect_metadata(vs)) { - tun_dst = metadata_dst_alloc(sizeof(*md), GFP_ATOMIC); + tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, +cpu_to_be64(vni >> 8), sizeof(*md)); + if (!tun_dst) goto drop; info = &tun_dst->u.tun_info; - if (vxlan_get_sk_family(vs) == AF_INET) { - const struct iphdr *iph = ip_hdr(skb); - - info->key.u.ipv4.src = iph->saddr; - info->key.u.ipv4.dst = iph->daddr; - info->key.tos = iph->tos; - info->key.ttl = iph->ttl; - } else { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); - - info->key.u.ipv6.src = ip6h->saddr; - info->key.u.ipv6.dst = ip6h->daddr; - info->key.tos = ipv6_get_dsfield(ip6h); - info->key.ttl = ip6h->hop_limit; - } - - info->key.tp_src = udp_hdr(skb)->source; - info->key.tp_dst = udp_hdr(skb)->dest; - - info->mode = IP_TUNNEL_INFO_RX; - info->key.tun_flags = TUNNEL_KEY; - info->key.tun_id = cpu_to_be64(vni >> 8); - if (udp_hdr(skb)->check != 0) - info->key.tun_flags |= TUNNEL_CSUM; - md = ip_tunnel_info_opts(info, sizeof(*md)); } else { memset(md, 0, sizeof(*md)); diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 2cb52d5..60c0332 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -48,4 +48,65 @@ static inline bool skb_valid_dst(const struct sk_buff *skb) struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags); +static inline struct metadata_dst *tun_rx_dst(__be16 flags, + __be64 tunnel_id, int md_size) +{ + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + tun_dst = metadata_dst_alloc(md_size, GFP_ATOMIC); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->mode = IP_TUNNEL_INFO_RX; + info->key.tun_flags = flags; + info->key.tun_id = tunnel_id; + info->key.tp_src = 0; + info->key.tp_dst = 0; + return tun_dst; +} + +static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, +__be16 flags, +__be64 tunnel_id, +int md_size) +{ + const struct iphdr *iph = ip_hdr(skb); + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + tun_dst = tun_rx_dst(flags, tunnel_id, md_size); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->key.u.ipv4.src = iph->saddr; + info->key.u.ipv4.dst = iph->daddr; + info->key.tos = iph->tos; + info->key.ttl = iph->ttl; + return tun_dst; +} + +static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, +__be16 flags, +__be64 tunnel_id, +int md_size) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + tun_dst = tun_rx_dst(flags, tunnel_id, md_size); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->key.u.ipv6.src = ip6h->saddr; + info->key.u.ipv6.dst = ip6h->daddr; + info->key.tos = ipv6_get_dsfield(ip6h); + info->key.ttl = ip6h->hop_limit; + return tun_dst; +} + #endif /* __NET_DST_METADATA_H */ diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index c491c12..35041d0 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -93,6 +93,10 @@ int udp_tunnel6_xmit_skb
[PATCH v4 net-next 8/8] geneve: Move device hash table to geneve socket.
This change simplifies Geneve Tunnel hash table management. Signed-off-by: Pravin B Shelar Reviewed-by: Jesse Gross Reviewed-by: John W. Linville --- Rebased on updated patches. --- drivers/net/geneve.c | 43 +-- 1 files changed, 17 insertions(+), 26 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index a36a1de..6bdf30b 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -40,7 +40,6 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); /* per-network namespace private data for this module */ struct geneve_net { struct list_headgeneve_list; - struct hlist_head vni_list[VNI_HASH_SIZE]; struct list_headsock_list; }; @@ -63,12 +62,12 @@ struct geneve_dev { struct geneve_sock { boolcollect_md; - struct geneve_net *gn; struct list_headlist; struct socket *sock; struct rcu_head rcu; int refcnt; struct udp_offload udp_offloads; + struct hlist_head vni_list[VNI_HASH_SIZE]; }; static inline __u32 geneve_net_vni_hash(u8 vni[3]) @@ -90,7 +89,7 @@ static __be64 vni_to_tunnel_id(const __u8 *vni) #endif } -static struct geneve_dev *geneve_lookup(struct geneve_net *gn, __be16 port, +static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, __be32 addr, u8 vni[]) { struct hlist_head *vni_list_head; @@ -99,13 +98,11 @@ static struct geneve_dev *geneve_lookup(struct geneve_net *gn, __be16 port, /* Find the device for this VNI */ hash = geneve_net_vni_hash(vni); - vni_list_head = &gn->vni_list[hash]; + vni_list_head = &gs->vni_list[hash]; hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) { if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) && - addr == geneve->remote.sin_addr.s_addr && - port == geneve->dst_port) { + addr == geneve->remote.sin_addr.s_addr) return geneve; - } } return NULL; } @@ -118,9 +115,7 @@ static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) /* geneve receive/decap routine */ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) { - struct inet_sock *sk = inet_sk(gs->sock->sk); struct genevehdr *gnvh = geneve_hdr(skb); - struct geneve_net *gn = gs->gn; struct metadata_dst *tun_dst = NULL; struct geneve_dev *geneve = NULL; struct pcpu_sw_netstats *stats; @@ -130,8 +125,6 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) bool xnet; int err; - iph = ip_hdr(skb); /* Still outer IP header... */ - if (gs->collect_md) { static u8 zero_vni[3]; @@ -139,10 +132,11 @@ static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb) addr = 0; } else { vni = gnvh->vni; + iph = ip_hdr(skb); /* Still outer IP header... */ addr = iph->saddr; } - geneve = geneve_lookup(gn, sk->inet_sport, addr, vni); + geneve = geneve_lookup(gs, addr, vni); if (!geneve) goto drop; @@ -413,6 +407,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, struct geneve_sock *gs; struct socket *sock; struct udp_tunnel_sock_cfg tunnel_cfg; + int h; gs = kzalloc(sizeof(*gs), GFP_KERNEL); if (!gs) @@ -426,7 +421,8 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, gs->sock = sock; gs->refcnt = 1; - gs->gn = gn; + for (h = 0; h < VNI_HASH_SIZE; ++h) + INIT_HLIST_HEAD(&gs->vni_list[h]); /* Initialize the geneve udp offloads structure */ gs->udp_offloads.port = port; @@ -440,7 +436,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, tunnel_cfg.encap_rcv = geneve_udp_encap_recv; tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tunnel_cfg); - list_add(&gs->list, &gn->sock_list); return gs; } @@ -485,6 +480,7 @@ static int geneve_open(struct net_device *dev) struct net *net = geneve->net; struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; + __u32 hash; gs = geneve_find_sock(gn, geneve->dst_port); if (gs) { @@ -499,14 +495,20 @@ static int geneve_open(struct net_device *dev) out: gs->collect_md = geneve->collect_md; geneve->sock = gs; + + hash = geneve_net_vni_hash(geneve->vni); + hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]); return 0; } static int geneve_stop(struct net_device *de
Re: [PATCH net-next] net: sched: consolidate tc_classify{,_compat}
On 8/26/15 2:00 PM, Daniel Borkmann wrote: For classifiers getting invoked via tc_classify(), we always need an extra function call into tc_classify_compat(), as both are being exported as symbols and tc_classify() itself doesn't do much except handling of reclassifications when tp->classify() returned with TC_ACT_RECLASSIFY. CBQ and ATM are the only qdiscs that directly call into tc_classify_compat(), all others use tc_classify(). When tc actions are being configured out in the kernel, tc_classify() effectively does nothing besides delegating. We could spare this layer and consolidate both functions. pktgen on single CPU constantly pushing skbs directly into the netif_receive_skb() path with a dummy classifier on ingress qdisc attached, improves slightly from 22.3Mpps to 23.1Mpps. Nice improvement! Signed-off-by: Daniel Borkmann --- include/net/pkt_sched.h | 4 +--- net/core/dev.c | 2 +- net/sched/sch_api.c | 55 ++-- net/sched/sch_atm.c | 2 +- net/sched/sch_cbq.c | 2 +- net/sched/sch_choke.c| 2 +- net/sched/sch_drr.c | 2 +- net/sched/sch_dsmark.c | 2 +- net/sched/sch_fq_codel.c | 2 +- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 2 +- net/sched/sch_multiq.c | 2 +- net/sched/sch_prio.c | 2 +- net/sched/sch_qfq.c | 2 +- net/sched/sch_sfb.c | 2 +- net/sched/sch_sfq.c | 2 +- probably 'static inline' helper with default compat_mode=false could have reduced the size of the diff, but I guess it's ok as it is. +#ifdef CONFIG_NET_CLS_ACT + if (unlikely(err == TC_ACT_RECLASSIFY && +!compat_mode)) why line break? even single line would be well below 80 char limit... - if (unlikely(limit++ >= MAX_REC_LOOP)) { - net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n", - tp->q->ops->id, - tp->prio & 0x, - ntohs(tp->protocol)); - return TC_ACT_SHOT; - } - goto reclassify; +reset: + if (unlikely(limit++ >= MAX_REC_LOOP)) { + net_notice_ratelimited("%s: reclassify loop, rule prio %u, " + "protocol %02x\n", tp->q->ops->id, + tp->prio & 0x, ntohs(tp->protocol)); why drop 'packet' and add two extra ',' in the message ? Not a big deal, just why bother? Also breaking strings is not advised, since it hurts grepping. Other than that. Acked-by: Alexei Starovoitov -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH -next] smsc911x: Fix crash seen if neither ACPI nor OF is configured or used
* Guenter Roeck [150826 13:58]: > Hi Tony, > > On 08/26/2015 01:16 PM, Tony Lindgren wrote: > [ ... ] > > >>We may need two separate patches, one to fix up device_property_read_u32() > >>to return -ENXIO, and one to fix smsc911x_probe_config() to ignore the error > >>from device_get_phy_mode(), and to bail out if device_property_read_u32() > >>returns -ENXIO. > > > >I guess the device_property_read_u32() change needs to be discussed > >separately.. So probably best to fix up the regression to smsc911x > >first. > > > Not sure myself. Jeremy has a point - we don't really know for sure how > safe it is to check for -ENODATA (in addition to -ENXIO). Also, fixing > device_property_read_u32() turned out to be much easier than I thought. > > >>The simpler alternative would be to check the return value from > >>device_property_read_u32() for both -ENXIO and -ENODATA. > >>This would make the code independent of the necessary core changes > >>(which may take a while). I tested this variant, and it works, at least > >>for the non-DT case. > >> > >>Does this make sense ? > > > >Yeh I think that would allow fixing up the smsc911x regression while > >discussing the device_property_read_u32() change. Got a test patch > >for me to try? > > > > You should have two by now to choose from. Acked the second version thanks :) Tony -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH -next v2 2/2] smsc911x: Ignore error return from device_get_phy_mode()
* Guenter Roeck [150826 13:24]: > Commit 62ee783bf1f8 ("smsc911x: Fix crash seen if neither ACPI nor OF is > configured or used") introduces an error check for the return value from > device_get_phy_mode() and bails out if there is an error. Unfortunately, > there are configurations where no phy is configured. Those configurations > now fail. > > To fix the problem, accept error returns from device_get_phy_mode(), > and use the return value from device_property_read_u32() to determine > if there is a suitable firmware interface to read the configuration. > > Fixes: 62ee783bf1f8 ("smsc911x: Fix crash seen if neither ACPI nor OF is > configured or used") > Signed-off-by: Guenter Roeck > --- > v2: Dropped RFC > Removed check for -ENODATA > Depends on patch 1/2 > > Tested with non-devicetree configuration. Should be tested with ACPI > and FDT configurations. Thanks this fixes smsc911x regression in Linux next for me with FDT: Tested-by: Tony Lindgren > drivers/net/ethernet/smsc/smsc911x.c | 14 -- > 1 file changed, 8 insertions(+), 6 deletions(-) > > diff --git a/drivers/net/ethernet/smsc/smsc911x.c > b/drivers/net/ethernet/smsc/smsc911x.c > index 6eef3251d833..c8b26259c9cf 100644 > --- a/drivers/net/ethernet/smsc/smsc911x.c > +++ b/drivers/net/ethernet/smsc/smsc911x.c > @@ -2369,23 +2369,25 @@ static int smsc911x_probe_config(struct > smsc911x_platform_config *config, > { > int phy_interface; > u32 width = 0; > + int err; > > phy_interface = device_get_phy_mode(dev); > if (phy_interface < 0) > - return phy_interface; > - > + phy_interface = PHY_INTERFACE_MODE_NA; > config->phy_interface = phy_interface; > > device_get_mac_address(dev, config->mac, ETH_ALEN); > > - device_property_read_u32(dev, "reg-shift", &config->shift); > - > - device_property_read_u32(dev, "reg-io-width", &width); > - if (width == 4) > + err = device_property_read_u32(dev, "reg-io-width", &width); > + if (err == -ENXIO) > + return err; > + if (!err && width == 4) > config->flags |= SMSC911X_USE_32BIT; > else > config->flags |= SMSC911X_USE_16BIT; > > + device_property_read_u32(dev, "reg-shift", &config->shift); > + > if (device_property_present(dev, "smsc,irq-active-high")) > config->irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_HIGH; > > -- > 2.1.4 > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net v3] sctp: asconf's process should verify address parameter is in the beginning
On 08/26/2015 05:09 PM, lucien xin wrote: > On Thu, Aug 27, 2015 at 4:59 AM, Marcelo Ricardo Leitner > wrote: >> On Wed, Aug 26, 2015 at 04:42:21PM -0400, Vlad Yasevich wrote: >>> On 08/26/2015 04:35 PM, Xin Long wrote: in sctp_process_asconf(), we get address parameter from the beginning of the addip params. but we never check if it's really there. if the addr param is not there, it still can pass sctp_verify_asconf(), then to be handled by sctp_process_asconf(), it will not be safe. so add a code in sctp_verify_asconf() to check the address parameter is in the beginning, or return false to send abort. v2->v3: * put the check in the loop, add the check for multiple address parameters. >>> >>> >>> Please split the multiple address detection from first address detection. >>> They are 2 different bugs and each one deserves a separate commit and >>> changelog. >> >> See below, thx. >> >>> >>> Thanks >>> -vlad >>> v1->v2: * put the check behind the params' length verify. Signed-off-by: Xin Long Signed-off-by: Vlad Yasevich --- net/sctp/sm_make_chunk.c | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 06320c8..4068fe1 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3130,14 +3130,24 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, case SCTP_PARAM_ERR_CAUSE: break; case SCTP_PARAM_IPV4_ADDRESS: + if (addr_param_seen) { + /* peer placed multiple address parameters into +* the same asconf. reject it. +*/ + return false; + } if (length != sizeof(sctp_ipv4addr_param_t)) return false; - addr_param_seen = true; + if (param.v == addip->addip_hdr.params) + addr_param_seen = true; break; >> >> I know I had suggested using addr_param_seen to check for multiple >> occurrences, but now realized we can simplify this with something like: >> >> + if (param.v != addip->addip_hdr.params) >> + return false; >> addr_param_seen = true; >> >> Then the check against addr_param_seen is not needed and do both checks >> at once. >> > looks nice, Vlad ? > yes. This is fine too. I think this kills 2 bugs with 1 patch... If you go this route, make sure to document this well in the change log. -vlad -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net v4] sctp: asconf's process should verify address parameter is in the beginning
On 08/26/2015 05:03 PM, Xin Long wrote: > in sctp_process_asconf(), we get address parameter from the beginning of > the addip params. but we never check if it's really there. if the addr > param is not there, it still can pass sctp_verify_asconf(), then to be > handled by sctp_process_asconf(), it will not be safe. > > so add detection in sctp_verify_asconf() to check the address parameter is in > the beginning, or return false to send abort. > > Signed-off-by: Xin Long > Signed-off-by: Vlad Yasevich Acked-by: Vlad Yasevich -vlad > --- > net/sctp/sm_make_chunk.c | 6 -- > 1 file changed, 4 insertions(+), 2 deletions(-) > > diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c > index 06320c8..f3fc881 100644 > --- a/net/sctp/sm_make_chunk.c > +++ b/net/sctp/sm_make_chunk.c > @@ -3132,12 +3132,14 @@ bool sctp_verify_asconf(const struct sctp_association > *asoc, > case SCTP_PARAM_IPV4_ADDRESS: > if (length != sizeof(sctp_ipv4addr_param_t)) > return false; > - addr_param_seen = true; > + if (param.v == addip->addip_hdr.params) > + addr_param_seen = true; > break; > case SCTP_PARAM_IPV6_ADDRESS: > if (length != sizeof(sctp_ipv6addr_param_t)) > return false; > - addr_param_seen = true; > + if (param.v == addip->addip_hdr.params) > + addr_param_seen = true; > break; > case SCTP_PARAM_ADD_IP: > case SCTP_PARAM_DEL_IP: > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 net-next 8/8] geneve: Move device hash table to geneve socket.
On Mon, Aug 24, 2015 at 10:43:15AM -0700, Pravin B Shelar wrote: > This change simplifies Geneve Tunnel hash table management. > > Signed-off-by: Pravin B Shelar > Reviewed-by: Jesse Gross Reviewed-by: John W. Linville -- John W. LinvilleSomeday the world will need a hero, and you linvi...@tuxdriver.com might be all we have. Be ready. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH, net-next] r8169:Actually from r810x_pll_power_up
Corcodel Marian : > Actually from r810x_pll_power_up function i removed function > r810x_phy_power_up because is two situation.One run from rtl8169_phy_reset > wich already power on interface after reset MII_BMCR and two i placed > supplementary on __rtl8169_resume. r810x_phy_power_up and r810x_pll_power_up are dedicated to 810x chipsets. __rtl8169_resume is used by all chipsets. -- Ueimor C'est systématiquement débile mais c'est toujours inattendu. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net v3] sctp: asconf's process should verify address parameter is in the beginning
On Thu, Aug 27, 2015 at 4:59 AM, Marcelo Ricardo Leitner wrote: > On Wed, Aug 26, 2015 at 04:42:21PM -0400, Vlad Yasevich wrote: >> On 08/26/2015 04:35 PM, Xin Long wrote: >> > in sctp_process_asconf(), we get address parameter from the beginning of >> > the addip params. but we never check if it's really there. if the addr >> > param is not there, it still can pass sctp_verify_asconf(), then to be >> > handled by sctp_process_asconf(), it will not be safe. >> > >> > so add a code in sctp_verify_asconf() to check the address parameter is in >> > the beginning, or return false to send abort. >> > >> > v2->v3: >> > * put the check in the loop, add the check for multiple address >> > parameters. >> >> >> Please split the multiple address detection from first address detection. >> They are 2 different bugs and each one deserves a separate commit and >> changelog. > > See below, thx. > >> >> Thanks >> -vlad >> >> > v1->v2: >> > * put the check behind the params' length verify. >> > >> > Signed-off-by: Xin Long >> > Signed-off-by: Vlad Yasevich >> > --- >> > net/sctp/sm_make_chunk.c | 14 -- >> > 1 file changed, 12 insertions(+), 2 deletions(-) >> > >> > diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c >> > index 06320c8..4068fe1 100644 >> > --- a/net/sctp/sm_make_chunk.c >> > +++ b/net/sctp/sm_make_chunk.c >> > @@ -3130,14 +3130,24 @@ bool sctp_verify_asconf(const struct >> > sctp_association *asoc, >> > case SCTP_PARAM_ERR_CAUSE: >> > break; >> > case SCTP_PARAM_IPV4_ADDRESS: >> > + if (addr_param_seen) { >> > + /* peer placed multiple address parameters into >> > +* the same asconf. reject it. >> > +*/ >> > + return false; >> > + } >> > if (length != sizeof(sctp_ipv4addr_param_t)) >> > return false; >> > - addr_param_seen = true; >> > + if (param.v == addip->addip_hdr.params) >> > + addr_param_seen = true; >> > break; > > I know I had suggested using addr_param_seen to check for multiple > occurrences, but now realized we can simplify this with something like: > > + if (param.v != addip->addip_hdr.params) > + return false; > addr_param_seen = true; > > Then the check against addr_param_seen is not needed and do both checks > at once. > looks nice, Vlad ? -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net v4] sctp: asconf's process should verify address parameter is in the beginning
in sctp_process_asconf(), we get address parameter from the beginning of the addip params. but we never check if it's really there. if the addr param is not there, it still can pass sctp_verify_asconf(), then to be handled by sctp_process_asconf(), it will not be safe. so add detection in sctp_verify_asconf() to check the address parameter is in the beginning, or return false to send abort. Signed-off-by: Xin Long Signed-off-by: Vlad Yasevich --- net/sctp/sm_make_chunk.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 06320c8..f3fc881 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3132,12 +3132,14 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, case SCTP_PARAM_IPV4_ADDRESS: if (length != sizeof(sctp_ipv4addr_param_t)) return false; - addr_param_seen = true; + if (param.v == addip->addip_hdr.params) + addr_param_seen = true; break; case SCTP_PARAM_IPV6_ADDRESS: if (length != sizeof(sctp_ipv6addr_param_t)) return false; - addr_param_seen = true; + if (param.v == addip->addip_hdr.params) + addr_param_seen = true; break; case SCTP_PARAM_ADD_IP: case SCTP_PARAM_DEL_IP: -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 net-next 4/8] geneve: Make dst-port configurable.
On Mon, Aug 24, 2015 at 10:43:11AM -0700, Pravin B Shelar wrote: > Add netlink interface to configure Geneve UDP port number. > So that user can configure it for a Gevene device. > > Signed-off-by: Pravin B Shelar > Reviewed-by: Jesse Gross Acked-by: John W. Linville -- John W. LinvilleSomeday the world will need a hero, and you linvi...@tuxdriver.com might be all we have. Be ready. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 net-next 2/8] geneve: Use skb mark and protocol to lookup route.
On Mon, Aug 24, 2015 at 10:43:09AM -0700, Pravin B Shelar wrote: > On packet transmit path geneve need to lookup route. Following > patch improves route lookup using more parameters. > > Signed-off-by: Pravin B Shelar > Reviewed-by: Jesse Gross > Acked-by: Thomas Graf > --- > drivers/net/geneve.c |3 +++ > 1 files changed, 3 insertions(+), 0 deletions(-) > > diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c > index 95e9da0..3c5b2b1 100644 > --- a/drivers/net/geneve.c > +++ b/drivers/net/geneve.c > @@ -202,6 +202,9 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, > struct net_device *dev) > memset(&fl4, 0, sizeof(fl4)); > fl4.flowi4_tos = RT_TOS(tos); > fl4.daddr = geneve->remote.sin_addr.s_addr; > + fl4.flowi4_mark = skb->mark; > + fl4.flowi4_proto = IPPROTO_UDP; > + > rt = ip_route_output_key(geneve->net, &fl4); > if (IS_ERR(rt)) { > netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); Acked-by: John W. Linville -- John W. LinvilleSomeday the world will need a hero, and you linvi...@tuxdriver.com might be all we have. Be ready. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net-next] net: sched: consolidate tc_classify{,_compat}
For classifiers getting invoked via tc_classify(), we always need an extra function call into tc_classify_compat(), as both are being exported as symbols and tc_classify() itself doesn't do much except handling of reclassifications when tp->classify() returned with TC_ACT_RECLASSIFY. CBQ and ATM are the only qdiscs that directly call into tc_classify_compat(), all others use tc_classify(). When tc actions are being configured out in the kernel, tc_classify() effectively does nothing besides delegating. We could spare this layer and consolidate both functions. pktgen on single CPU constantly pushing skbs directly into the netif_receive_skb() path with a dummy classifier on ingress qdisc attached, improves slightly from 22.3Mpps to 23.1Mpps. Signed-off-by: Daniel Borkmann --- include/net/pkt_sched.h | 4 +--- net/core/dev.c | 2 +- net/sched/sch_api.c | 55 ++-- net/sched/sch_atm.c | 2 +- net/sched/sch_cbq.c | 2 +- net/sched/sch_choke.c| 2 +- net/sched/sch_drr.c | 2 +- net/sched/sch_dsmark.c | 2 +- net/sched/sch_fq_codel.c | 2 +- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 2 +- net/sched/sch_multiq.c | 2 +- net/sched/sch_prio.c | 2 +- net/sched/sch_qfq.c | 2 +- net/sched/sch_sfb.c | 2 +- net/sched/sch_sfq.c | 2 +- 16 files changed, 40 insertions(+), 47 deletions(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2342bf1..401038d 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -110,10 +110,8 @@ static inline void qdisc_run(struct Qdisc *q) __qdisc_run(q); } -int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res); int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res); + struct tcf_result *res, bool compat_mode); static inline __be16 tc_skb_protocol(const struct sk_buff *skb) { diff --git a/net/core/dev.c b/net/core/dev.c index b1f3f48..7bb24f1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3657,7 +3657,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); qdisc_bstats_cpu_update(cl->q, skb); - switch (tc_classify(skb, cl, &cl_res)) { + switch (tc_classify(skb, cl, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f06aa01..59c227f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1806,51 +1806,46 @@ done: * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. */ -int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) { __be16 protocol = tc_skb_protocol(skb); - int err; +#ifdef CONFIG_NET_CLS_ACT + const struct tcf_proto *old_tp = tp; + int limit = 0; +reclassify: +#endif for (; tp; tp = rcu_dereference_bh(tp->next)) { + int err; + if (tp->protocol != protocol && tp->protocol != htons(ETH_P_ALL)) continue; - err = tp->classify(skb, tp, res); + err = tp->classify(skb, tp, res); +#ifdef CONFIG_NET_CLS_ACT + if (unlikely(err == TC_ACT_RECLASSIFY && +!compat_mode)) + goto reset; +#endif if (err >= 0) return err; } - return -1; -} -EXPORT_SYMBOL(tc_classify_compat); -int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) -{ - int err = 0; -#ifdef CONFIG_NET_CLS_ACT - const struct tcf_proto *otp = tp; - int limit = 0; -reclassify: -#endif - - err = tc_classify_compat(skb, tp, res); + return -1; #ifdef CONFIG_NET_CLS_ACT - if (err == TC_ACT_RECLASSIFY) { - tp = otp; - - if (unlikely(limit++ >= MAX_REC_LOOP)) { - net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n", - tp->q->ops->id, - tp->prio & 0x, - ntohs(tp->protocol)); - return TC_ACT_SHOT; - } - goto reclassify; +reset: + if (unlikely(limit++ >= MAX_REC_LOOP)) { + net_notice_ratelimited("%s: reclassify loop, rule prio %u, " + "protocol %02x\n", tp->q->ops->id, +
Re: [PATCH net v3] sctp: asconf's process should verify address parameter is in the beginning
On Wed, Aug 26, 2015 at 04:42:21PM -0400, Vlad Yasevich wrote: > On 08/26/2015 04:35 PM, Xin Long wrote: > > in sctp_process_asconf(), we get address parameter from the beginning of > > the addip params. but we never check if it's really there. if the addr > > param is not there, it still can pass sctp_verify_asconf(), then to be > > handled by sctp_process_asconf(), it will not be safe. > > > > so add a code in sctp_verify_asconf() to check the address parameter is in > > the beginning, or return false to send abort. > > > > v2->v3: > > * put the check in the loop, add the check for multiple address parameters. > > > Please split the multiple address detection from first address detection. > They are 2 different bugs and each one deserves a separate commit and > changelog. See below, thx. > > Thanks > -vlad > > > v1->v2: > > * put the check behind the params' length verify. > > > > Signed-off-by: Xin Long > > Signed-off-by: Vlad Yasevich > > --- > > net/sctp/sm_make_chunk.c | 14 -- > > 1 file changed, 12 insertions(+), 2 deletions(-) > > > > diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c > > index 06320c8..4068fe1 100644 > > --- a/net/sctp/sm_make_chunk.c > > +++ b/net/sctp/sm_make_chunk.c > > @@ -3130,14 +3130,24 @@ bool sctp_verify_asconf(const struct > > sctp_association *asoc, > > case SCTP_PARAM_ERR_CAUSE: > > break; > > case SCTP_PARAM_IPV4_ADDRESS: > > + if (addr_param_seen) { > > + /* peer placed multiple address parameters into > > +* the same asconf. reject it. > > +*/ > > + return false; > > + } > > if (length != sizeof(sctp_ipv4addr_param_t)) > > return false; > > - addr_param_seen = true; > > + if (param.v == addip->addip_hdr.params) > > + addr_param_seen = true; > > break; I know I had suggested using addr_param_seen to check for multiple occurrences, but now realized we can simplify this with something like: + if (param.v != addip->addip_hdr.params) + return false; addr_param_seen = true; Then the check against addr_param_seen is not needed and do both checks at once. Thanks, Marcelo > > case SCTP_PARAM_IPV6_ADDRESS: > > + if (addr_param_seen) > > + return false; > > if (length != sizeof(sctp_ipv6addr_param_t)) > > return false; > > - addr_param_seen = true; > > + if (param.v == addip->addip_hdr.params) > > + addr_param_seen = true; > > break; > > case SCTP_PARAM_ADD_IP: > > case SCTP_PARAM_DEL_IP: > > > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH -next] smsc911x: Fix crash seen if neither ACPI nor OF is configured or used
Hi Tony, On 08/26/2015 01:16 PM, Tony Lindgren wrote: [ ... ] We may need two separate patches, one to fix up device_property_read_u32() to return -ENXIO, and one to fix smsc911x_probe_config() to ignore the error from device_get_phy_mode(), and to bail out if device_property_read_u32() returns -ENXIO. I guess the device_property_read_u32() change needs to be discussed separately.. So probably best to fix up the regression to smsc911x first. Not sure myself. Jeremy has a point - we don't really know for sure how safe it is to check for -ENODATA (in addition to -ENXIO). Also, fixing device_property_read_u32() turned out to be much easier than I thought. The simpler alternative would be to check the return value from device_property_read_u32() for both -ENXIO and -ENODATA. This would make the code independent of the necessary core changes (which may take a while). I tested this variant, and it works, at least for the non-DT case. Does this make sense ? Yeh I think that would allow fixing up the smsc911x regression while discussing the device_property_read_u32() change. Got a test patch for me to try? You should have two by now to choose from. Guenter -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: ip_rcv_finish() NULL pointer and possibly related Oopses
On Wed, Aug 26, 2015 at 4:49 AM, Chuck Ebbert wrote: > On Wed, 26 Aug 2015 08:46:59 + > Shaun Crampton wrote: > >> Testing our app at scale on Google¹s GCE, running ~1000 CoreOS hosts: over >> approximately 1 hour, I see about 1 in 50 hosts hit one of the Oopses >> below and then reboot (I¹m not sure if the different oopses are related to >> each other). >> >> The app is Project Calico, which is a datacenter networking fabric. >> calico-felix, the process named below, is our per-host agent. The >> per-host agent is responsible for reading the network information from a >> central server and applying "ip route² and "iptables" updates to the >> kernel. We¹re running on CoreOS, with about 100 docker containers/veths >> pairs running on each host. calico-felix is running inside one of those >> containers. We also run the BIRD BGP stack to redistribute routes around >> the datacenter. The errors happen more frequently while Calico is under >> load. >> >> I¹m not sure where to go from here. I can reproduce these issues easily >> at that scale but I haven¹t managed to boil it down to a small-scale repro >> scenario for further investigation (yet). >> > > What in the world is going on with those call traces? E.g.: > >> [ 4513.712008] >> [ 4513.712008] [] ? ip_rcv_finish+0x81/0x360 >> [ 4513.712008] [] ip_rcv+0x2a4/0x400 >> [ 4513.712008] [] ? inet_del_offload+0x40/0x40 >> [ 4513.712008] [] __netif_receive_skb_core+0x6c3/0x9a0 >> [ 4513.712008] [] ? build_skb+0x17/0x90 >> [ 4513.712008] [] __netif_receive_skb+0x18/0x60 >> [ 4513.712008] [] netif_receive_skb_internal+0x33/0xa0 >> [ 4513.712008] [] netif_receive_skb_sk+0x1c/0x70 >> [ 4513.712008] [] 0xa00f772b >> [ 4513.712008] [] ? __netif_receive_skb_core+0x6c3/0x9a0 >> [ 4513.712008] [] 0xa00f7d81 >> [ 4513.712008] [] net_rx_action+0x159/0x340 >> [ 4513.712008] [] __do_softirq+0xf4/0x290 >> [ 4513.712008] [] irq_exit+0xad/0xc0 >> [ 4513.712008] [] do_IRQ+0x5a/0xf0 >> [ 4513.712008] [] common_interrupt+0x6e/0x6e >> [ 4513.712008] > > There are two functions in the call trace that the kernel knows > nothing about. How did they get in there? > > And there is really executable code in there, as can be seen from a > later trace: > >> [ 4123.003006] >> [ 4123.003006] [] nf_iterate+0x57/0x80 >> [ 4123.003006] [] nf_hook_slow+0x97/0x100 >> [ 4123.003006] [] ip_local_deliver+0x92/0xa0 >> [ 4123.003006] [] ? ip_rcv_finish+0x360/0x360 >> [ 4123.003006] [] ip_rcv_finish+0x81/0x360 >> [ 4123.003006] [] ip_rcv+0x2a4/0x400 >> [ 4123.003006] [] ? inet_del_offload+0x40/0x40 >> [ 4123.003006] [] __netif_receive_skb_core+0x6c3/0x9a0 >> [ 4123.003006] [] ? build_skb+0x17/0x90 >> [ 4123.003006] [] __netif_receive_skb+0x18/0x60 >> [ 4123.003006] [] netif_receive_skb_internal+0x33/0xa0 >> [ 4123.003006] [] netif_receive_skb_sk+0x1c/0x70 >> [ 4123.003006] [] 0xa00d472b >> [ 4123.003006] [] 0xa00d4d81 >> [ 4123.003006] [] net_rx_action+0x159/0x340 >> [ 4123.003006] [] __do_softirq+0xf4/0x290 >> [ 4123.003006] [] irq_exit+0xad/0xc0 >> [ 4123.003006] [] do_IRQ+0x5a/0xf0 >> [ 4123.003006] [] common_interrupt+0x6e/0x6e >> [ 4123.003006] >> [ 4123.003006] [] ? __ip_route_output_key+0x31d/0x860 >> [ 4123.003006] [] ? xfrm_lookup_route+0x5/0x70 >> [ 4123.003006] [] ? ip_route_output_flow+0x54/0x60 >> [ 4123.003006] [] ip_queue_xmit+0x36a/0x3d0 >> [ 4123.003006] [] tcp_transmit_skb+0x4b9/0x990 >> [ 4123.003006] [] tcp_write_xmit+0x115/0xe90 >> [ 4123.003006] [] __tcp_push_pending_frames+0x32/0xd0 >> [ 4123.003006] [] tcp_push+0xef/0x120 >> [ 4123.003006] [] tcp_sendmsg+0xc5/0xb20 >> [ 4123.003006] [] ? lock_hrtimer_base.isra.22+0x29/0x50 >> [ 4123.003006] [] inet_sendmsg+0x64/0xa0 >> [ 4123.003006] [] ? __fget_light+0x25/0x70 >> [ 4123.003006] [] sock_sendmsg+0x3d/0x50 >> [ 4123.003006] [] SYSC_sendto+0x102/0x1a0 >> [ 4123.003006] [] ? __audit_syscall_entry+0xb4/0x110 >> [ 4123.003006] [] ? do_audit_syscall_entry+0x6c/0x70 >> [ 4123.003006] [] ? >> syscall_trace_enter_phase1+0x103/0x160 >> [ 4123.003006] [] SyS_sendto+0xe/0x10 >> [ 4123.003006] [] system_call_fastpath+0x12/0x71 >> [ 4123.003006] Code: <48> 8b 88 40 03 00 00 e8 1d dd dd ff 5d c3 0f 1f 00 >> 41 83 b9 80 00 >> [ 4123.003006] RIP [] 0xa0233027 >> [ 4123.003006] RSP > > Presumably the same two functions as before (loaded at a different > base address but same offsets, 0xd81 and 0x72b). And then nf_iterate > call into another unknown function, and there really is code there > and it's consistent with the oops. And the kernel thinks it's > outside of any normal text section, so it does not try to dump any > code from before the instruction pointer. > >0: 48 8b 88 40 03 00 00mov0x340(%rax),%rcx >7: e8 1d dd dd ff callq 0xff29 >c: 5d pop%rbp >d: c3 retq > > Did you write your own module loader or something? These are stock k
[PATCH net v3] sctp: donot reset the overall_error_count in SHUTDOWN_RECEIVE state
Commit f8d960524328 ("sctp: Enforce retransmission limit during shutdown") fixed a problem with excessive retransmissions in the SHUTDOWN_PENDING by not resetting the association overall_error_count. This allowed the association to better enforce assoc.max_retrans limit. However, the same issue still exists when the association is in SHUTDOWN_RECEIVED state. In this state, HB-ACKs will continue to reset the overall_error_count for the association would extend the lifetime of association unnecessarily. This patch solves this by resetting the overall_error_count whenever the current state is small then SCTP_STATE_SHUTDOWN_PENDING. As a small side-effect, we end up also handling SCTP_STATE_SHUTDOWN_ACK_SENT and SCTP_STATE_SHUTDOWN_SENT states, but they are not really impacted because we disable Heartbeats in those states. Fixes: Commit f8d960524328 ("sctp: Enforce retransmission limit during shutdown") Signed-off-by: Xin Long --- net/sctp/sm_sideeffect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index fef2acd..85e6f03 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -702,7 +702,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, * outstanding data and rely on the retransmission limit be reached * to shutdown the association. */ - if (t->asoc->state != SCTP_STATE_SHUTDOWN_PENDING) + if (t->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) t->asoc->overall_error_count = 0; /* Clear the hb_sent flag to signal that we had a good -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 net-next 1/8] geneve: Initialize ethernet address in device setup.
On Mon, Aug 24, 2015 at 10:43:08AM -0700, Pravin B Shelar wrote: > Signed-off-by: Pravin B Shelar > Reviewed-by: Jesse Gross > Acked-by: Thomas Graf > --- > drivers/net/geneve.c |4 +--- > 1 files changed, 1 insertions(+), 3 deletions(-) > > diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c > index 897e1a3..95e9da0 100644 > --- a/drivers/net/geneve.c > +++ b/drivers/net/geneve.c > @@ -297,6 +297,7 @@ static void geneve_setup(struct net_device *dev) > > netif_keep_dst(dev); > dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; > + eth_hw_addr_random(dev); > } > > static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { > @@ -364,9 +365,6 @@ static int geneve_newlink(struct net *net, struct > net_device *dev, > return -EBUSY; > } > > - if (tb[IFLA_ADDRESS] == NULL) > - eth_hw_addr_random(dev); > - > err = register_netdevice(dev); > if (err) > return err; Acked-by: John W. Linville -- John W. LinvilleSomeday the world will need a hero, and you linvi...@tuxdriver.com might be all we have. Be ready. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH net v3] sctp: asconf's process should verify address parameter is in the beginning
On 08/26/2015 04:35 PM, Xin Long wrote: > in sctp_process_asconf(), we get address parameter from the beginning of > the addip params. but we never check if it's really there. if the addr > param is not there, it still can pass sctp_verify_asconf(), then to be > handled by sctp_process_asconf(), it will not be safe. > > so add a code in sctp_verify_asconf() to check the address parameter is in > the beginning, or return false to send abort. > > v2->v3: > * put the check in the loop, add the check for multiple address parameters. Please split the multiple address detection from first address detection. They are 2 different bugs and each one deserves a separate commit and changelog. Thanks -vlad > v1->v2: > * put the check behind the params' length verify. > > Signed-off-by: Xin Long > Signed-off-by: Vlad Yasevich > --- > net/sctp/sm_make_chunk.c | 14 -- > 1 file changed, 12 insertions(+), 2 deletions(-) > > diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c > index 06320c8..4068fe1 100644 > --- a/net/sctp/sm_make_chunk.c > +++ b/net/sctp/sm_make_chunk.c > @@ -3130,14 +3130,24 @@ bool sctp_verify_asconf(const struct sctp_association > *asoc, > case SCTP_PARAM_ERR_CAUSE: > break; > case SCTP_PARAM_IPV4_ADDRESS: > + if (addr_param_seen) { > + /* peer placed multiple address parameters into > + * the same asconf. reject it. > + */ > + return false; > + } > if (length != sizeof(sctp_ipv4addr_param_t)) > return false; > - addr_param_seen = true; > + if (param.v == addip->addip_hdr.params) > + addr_param_seen = true; > break; > case SCTP_PARAM_IPV6_ADDRESS: > + if (addr_param_seen) > + return false; > if (length != sizeof(sctp_ipv6addr_param_t)) > return false; > - addr_param_seen = true; > + if (param.v == addip->addip_hdr.params) > + addr_param_seen = true; > break; > case SCTP_PARAM_ADD_IP: > case SCTP_PARAM_DEL_IP: > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv6 net-next 05/10] openvswitch: Add conntrack action
On Wed, Aug 26, 2015 at 11:31 AM, Joe Stringer wrote: > Expose the kernel connection tracker via OVS. Userspace components can > make use of the CT action to populate the connection state (ct_state) > field for a flow. This state can be subsequently matched. > > Exposed connection states are OVS_CS_F_*: > - NEW (0x01) - Beginning of a new connection. > - ESTABLISHED (0x02) - Part of an existing connection. > - RELATED (0x04) - Related to an established connection. > - INVALID (0x20) - Could not track the connection for this packet. > - REPLY_DIR (0x40) - This packet is in the reply direction for the flow. > - TRACKED (0x80) - This packet has been sent through conntrack. > > When the CT action is executed by itself, it will send the packet > through the connection tracker and populate the ct_state field with one > or more of the connection state flags above. The CT action will always > set the TRACKED bit. > > When the COMMIT flag is passed to the conntrack action, this specifies > that information about the connection should be stored. This allows > subsequent packets for the same (or related) connections to be > correlated with this connection. Sending subsequent packets for the > connection through conntrack allows the connection tracker to consider > the packets as ESTABLISHED, RELATED, and/or REPLY_DIR. > > The CT action may optionally take a zone to track the flow within. This > allows connections with the same 5-tuple to be kept logically separate > from connections in other zones. If the zone is specified, then the > "ct_zone" match field will be subsequently populated with the zone id. > > IP fragments are handled by transparently assembling them as part of the > CT action. The maximum received unit (MRU) size is tracked so that > refragmentation can occur during output. > > IP frag handling contributed by Andy Zhou. > > Signed-off-by: Joe Stringer > Signed-off-by: Justin Pettit > Signed-off-by: Andy Zhou > Acked-by: Thomas Graf Acked-by: Pravin B Shelar -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mac80211: Do not use sizeof() on pointer type
On 08/26/2015 10:23 PM, Arend van Spriel wrote: On 08/26/2015 12:22 PM, Thierry Reding wrote: From: Thierry Reding The rate_control_cap_mask() function takes a parameter mcs_mask, which GCC will take to be u8 * even though it was declared with a fixed size. This causes the following warning: net/mac80211/rate.c: In function 'rate_control_cap_mask': net/mac80211/rate.c:719:25: warning: 'sizeof' on array function parameter 'mcs_mask' will return size of 'u8 * {aka unsigned char *}' [-Wsizeof-array-argument] for (i = 0; i < sizeof(mcs_mask); i++) ^ net/mac80211/rate.c:684:10: note: declared here u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN], ^ This can be easily fixed by using the IEEE80211_HT_MCS_MASK_LEN directly within the loop condition. Or use ARRAY_SIZE(mcs_mask). Better not. Missed the 'function parameter' bit. Sorry for the noise. Regards, Arend Regards, Arend Signed-off-by: Thierry Reding --- net/mac80211/rate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 9857693b91ec..9ce8883d5f44 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -716,7 +716,7 @@ static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata, /* Filter out rates that the STA does not support */ *mask &= sta->supp_rates[sband->band]; -for (i = 0; i < sizeof(mcs_mask); i++) +for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i]; sta_vht_cap = sta->vht_cap.vht_mcs.rx_mcs_map; -- To unsubscribe from this list: send the line "unsubscribe linux-wireless" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH net v3] sctp: asconf's process should verify address parameter is in the beginning
in sctp_process_asconf(), we get address parameter from the beginning of the addip params. but we never check if it's really there. if the addr param is not there, it still can pass sctp_verify_asconf(), then to be handled by sctp_process_asconf(), it will not be safe. so add a code in sctp_verify_asconf() to check the address parameter is in the beginning, or return false to send abort. v2->v3: * put the check in the loop, add the check for multiple address parameters. v1->v2: * put the check behind the params' length verify. Signed-off-by: Xin Long Signed-off-by: Vlad Yasevich --- net/sctp/sm_make_chunk.c | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 06320c8..4068fe1 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3130,14 +3130,24 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, case SCTP_PARAM_ERR_CAUSE: break; case SCTP_PARAM_IPV4_ADDRESS: + if (addr_param_seen) { + /* peer placed multiple address parameters into +* the same asconf. reject it. +*/ + return false; + } if (length != sizeof(sctp_ipv4addr_param_t)) return false; - addr_param_seen = true; + if (param.v == addip->addip_hdr.params) + addr_param_seen = true; break; case SCTP_PARAM_IPV6_ADDRESS: + if (addr_param_seen) + return false; if (length != sizeof(sctp_ipv6addr_param_t)) return false; - addr_param_seen = true; + if (param.v == addip->addip_hdr.params) + addr_param_seen = true; break; case SCTP_PARAM_ADD_IP: case SCTP_PARAM_DEL_IP: -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mac80211: Do not use sizeof() on pointer type
On 08/26/2015 12:22 PM, Thierry Reding wrote: From: Thierry Reding The rate_control_cap_mask() function takes a parameter mcs_mask, which GCC will take to be u8 * even though it was declared with a fixed size. This causes the following warning: net/mac80211/rate.c: In function 'rate_control_cap_mask': net/mac80211/rate.c:719:25: warning: 'sizeof' on array function parameter 'mcs_mask' will return size of 'u8 * {aka unsigned char *}' [-Wsizeof-array-argument] for (i = 0; i < sizeof(mcs_mask); i++) ^ net/mac80211/rate.c:684:10: note: declared here u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN], ^ This can be easily fixed by using the IEEE80211_HT_MCS_MASK_LEN directly within the loop condition. Or use ARRAY_SIZE(mcs_mask). Regards, Arend Signed-off-by: Thierry Reding --- net/mac80211/rate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 9857693b91ec..9ce8883d5f44 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -716,7 +716,7 @@ static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata, /* Filter out rates that the STA does not support */ *mask &= sta->supp_rates[sband->band]; - for (i = 0; i < sizeof(mcs_mask); i++) + for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i]; sta_vht_cap = sta->vht_cap.vht_mcs.rx_mcs_map; -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html