[PATCH net-next] hv_netvsc: report stop_queue and wake_queue
Report the numbers of events for stop_queue and wake_queue in ethtool stats. Example: ethtool -S eth0 NIC statistics: ... stop_queue: 7 wake_queue: 7 ... Signed-off-by: Simon Xiao <six...@microsoft.com> Reviewed-by: Haiyang Zhang <haiya...@microsoft.com> --- drivers/net/hyperv/hyperv_net.h | 2 ++ drivers/net/hyperv/netvsc.c | 12 ++-- drivers/net/hyperv/netvsc_drv.c | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 5176be7..6f550e1 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -686,6 +686,8 @@ struct netvsc_ethtool_stats { unsigned long tx_busy; unsigned long tx_send_full; unsigned long rx_comp_busy; + unsigned long stop_queue; + unsigned long wake_queue; }; struct netvsc_vf_pcpu_stats { diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index b0d323e..6e51949 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -609,6 +609,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, { struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id; struct net_device *ndev = hv_get_drvdata(device); + struct net_device_context *ndev_ctx = netdev_priv(ndev); struct vmbus_channel *channel = device->channel; u16 q_idx = 0; int queue_sends; @@ -643,8 +644,10 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && (hv_ringbuf_avail_percent(>outbound) > RING_AVAIL_PERCENT_HIWATER || -queue_sends < 1)) +queue_sends < 1)) { netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx)); + ndev_ctx->eth_stats.wake_queue++; + } } static void netvsc_send_completion(struct netvsc_device *net_device, @@ -749,6 +752,7 @@ static inline int netvsc_send_pkt( _device->chan_table[packet->q_idx]; struct vmbus_channel *out_channel = nvchan->channel; struct net_device *ndev = hv_get_drvdata(device); + struct net_device_context *ndev_ctx = netdev_priv(ndev); struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx); u64 req_id; int ret; @@ -789,12 +793,16 @@ static inline int netvsc_send_pkt( if (ret == 0) { atomic_inc_return(>queue_sends); - if (ring_avail < RING_AVAIL_PERCENT_LOWATER) + if (ring_avail < RING_AVAIL_PERCENT_LOWATER) { netif_tx_stop_queue(txq); + ndev_ctx->eth_stats.stop_queue++; + } } else if (ret == -EAGAIN) { netif_tx_stop_queue(txq); + ndev_ctx->eth_stats.stop_queue++; if (atomic_read(>queue_sends) < 1) { netif_tx_wake_queue(txq); + ndev_ctx->eth_stats.wake_queue++; ret = -ENOSPC; } } else { diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index e9d54c9..f300ae6 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1126,6 +1126,8 @@ static const struct { { "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) }, { "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) }, { "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) }, + { "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) }, + { "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) }, }, vf_stats[] = { { "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) }, { "vf_rx_bytes", offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) }, -- 2.7.4
[PATCH net-next,1/1] tools: hv: ignore a NIC if it has been configured
Let bondvf.sh ignore this NIC if it has been configured, to prevent user configuration from being overwritten unexpectly. Signed-off-by: Simon Xiao <six...@microsoft.com> --- tools/hv/bondvf.sh | 27 +-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh index 89b2506..80f1028 100755 --- a/tools/hv/bondvf.sh +++ b/tools/hv/bondvf.sh @@ -211,6 +211,30 @@ function create_bond { echo $'\nBond name:' $bondname + if [ $distro == ubuntu ] + then + local mainfn=$cfgdir/interfaces + local s="^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+${bondname}" + + grep -E "$s" $mainfn + if [ $? -eq 0 ] + then + echo "WARNING: ${bondname} has been configured already" + return + fi + elif [ $distro == redhat ] || [ $distro == suse ] + then + local fn=$cfgdir/ifcfg-$bondname + if [ -f $fn ] + then + echo "WARNING: ${bondname} has been configured already" + return + fi + else + echo "Unsupported Distro: ${distro}" + return + fi + echo configuring $primary create_eth_cfg_pri_$distro $primary $bondname @@ -219,8 +243,6 @@ function create_bond { echo creating: $bondname with primary slave: $primary create_bond_cfg_$distro $bondname $primary $secondary - - let bondcnt=bondcnt+1 } for (( i=0; i < $eth_cnt-1; i++ )) @@ -228,5 +250,6 @@ do if [ -n "${list_match[$i]}" ] then create_bond ${list_eth[$i]} ${list_match[$i]} + let bondcnt=bondcnt+1 fi done -- 2.7.4
[PATCH net-next, 2/2] tools: hv: set hotplug for VF on Suse
On HyperV, the VF interface can be offered by a host at any time. Mark the VF interface as hotplug, to make sure it will be brought up automatically when it is registered. Signed-off-by: Simon Xiao <six...@microsoft.com> Reviewed-by: Haiyang Zhang <haiya...@microsoft.com> --- tools/hv/bondvf.sh | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh index 8abd46e..89b2506 100755 --- a/tools/hv/bondvf.sh +++ b/tools/hv/bondvf.sh @@ -170,7 +170,11 @@ function create_eth_cfg_suse { } function create_eth_cfg_pri_suse { - create_eth_cfg_suse $1 + local fn=$cfgdir/ifcfg-$1 + + rm -f $fn + echo BOOTPROTO=none >>$fn + echo STARTMODE=hotplug >>$fn } function create_bond_cfg_suse { -- 2.7.4
[PATCH net-next, 1/2] tools: hv: set allow-hotplug for VF on Ubuntu
On HyperV, the VF interface can be offered by a host at any time. Mark the VF interface as hotplug, to make sure it will be brought up automatically when it is registered. Signed-off-by: Simon Xiao <six...@microsoft.com> Reviewed-by: Haiyang Zhang <haiya...@microsoft.com> --- tools/hv/bondvf.sh | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh index 112deba..8abd46e 100755 --- a/tools/hv/bondvf.sh +++ b/tools/hv/bondvf.sh @@ -134,7 +134,6 @@ function create_eth_cfg_ubuntu { local fn=$cfgdir/interfaces del_eth_cfg_ubuntu $1 - echo $'\n'auto $1 >>$fn echo iface $1 inet manual >>$fn echo bond-master $2 >>$fn @@ -143,7 +142,10 @@ function create_eth_cfg_ubuntu { function create_eth_cfg_pri_ubuntu { local fn=$cfgdir/interfaces - create_eth_cfg_ubuntu $1 $2 + del_eth_cfg_ubuntu $1 + echo $'\n'allow-hotplug $1 >>$fn + echo iface $1 inet manual >>$fn + echo bond-master $2 >>$fn echo bond-primary $1 >>$fn } -- 2.7.4
RE: [PATCH 1/1] hv_netvsc: fix a netvsc stats typo
Please ignore this patch. I will resubmit it to net-next. > -Original Message- > From: Simon Xiao [mailto:six...@microsoft.com] > Sent: Tuesday, February 7, 2017 10:03 AM > To: KY Srinivasan <k...@microsoft.com>; Haiyang Zhang > <haiya...@microsoft.com>; Stephen Hemminger > <sthem...@microsoft.com>; de...@linuxdriverproject.org; > netdev@vger.kernel.org; linux-ker...@vger.kernel.org > Cc: Simon Xiao <six...@microsoft.com> > Subject: [PATCH 1/1] hv_netvsc: fix a netvsc stats typo > > [This sender failed our fraud detection checks and may not be who they > appear to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing] > > Now, return the correct tx_errors stats in netvsc. > > Signed-off-by: Simon Xiao <six...@microsoft.com> > Reviewed-by: Haiyang Zhang <haiya...@microsoft.com> > --- > drivers/net/hyperv/netvsc_drv.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/net/hyperv/netvsc_drv.c > b/drivers/net/hyperv/netvsc_drv.c > index 72b0c1f..725ac19 100644 > --- a/drivers/net/hyperv/netvsc_drv.c > +++ b/drivers/net/hyperv/netvsc_drv.c > @@ -920,7 +920,7 @@ static void netvsc_get_stats64(struct net_device *net, > } > > t->tx_dropped = net->stats.tx_dropped; > - t->tx_errors= net->stats.tx_dropped; > + t->tx_errors= net->stats.tx_errors; > > t->rx_dropped = net->stats.rx_dropped; > t->rx_errors= net->stats.rx_errors; > -- > 2.7.4
[PATCH net-next] hv_netvsc: add ethtool support for set and get of settings
This patch allows the user to set and retrieve speed and duplex of the hv_netvsc device via ethtool. Example: $ ethtool eth0 Settings for eth0: ... Speed: Unknown! Duplex: Unknown! (255) ... $ ethtool -s eth0 speed 1000 duplex full $ ethtool eth0 Settings for eth0: ... Speed: 1000Mb/s Duplex: Full ... This is based on patches by Roopa Prabhu and Nikolay Aleksandrov. Signed-off-by: Simon Xiao <six...@microsoft.com> --- drivers/net/hyperv/hyperv_net.h | 4 +++ drivers/net/hyperv/netvsc_drv.c | 56 + 2 files changed, 60 insertions(+) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index fcb92c0..b4c6878 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -658,6 +658,10 @@ struct net_device_context { struct netvsc_stats __percpu *tx_stats; struct netvsc_stats __percpu *rx_stats; + + /* Ethtool settings */ + u8 duplex; + u32 speed; }; /* Per netvsc device */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 202e2b1..e703b9a 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -799,6 +799,58 @@ static int netvsc_set_channels(struct net_device *net, goto do_set; } +static bool netvsc_validate_ethtool_ss_cmd(const struct ethtool_cmd *cmd) +{ + struct ethtool_cmd diff1 = *cmd; + struct ethtool_cmd diff2 = {}; + + ethtool_cmd_speed_set(, 0); + diff1.duplex = 0; + /* advertising and cmd are usually set */ + diff1.advertising = 0; + diff1.cmd = 0; + /* We set port to PORT_OTHER */ + diff2.port = PORT_OTHER; + + return !memcmp(, , sizeof(diff1)); +} + +static void netvsc_init_settings(struct net_device *dev) +{ + struct net_device_context *ndc = netdev_priv(dev); + + ndc->speed = SPEED_UNKNOWN; + ndc->duplex = DUPLEX_UNKNOWN; +} + +static int netvsc_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct net_device_context *ndc = netdev_priv(dev); + + ethtool_cmd_speed_set(cmd, ndc->speed); + cmd->duplex = ndc->duplex; + cmd->port = PORT_OTHER; + + return 0; +} + +static int netvsc_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct net_device_context *ndc = netdev_priv(dev); + u32 speed; + + speed = ethtool_cmd_speed(cmd); + if (!ethtool_validate_speed(speed) || + !ethtool_validate_duplex(cmd->duplex) || + !netvsc_validate_ethtool_ss_cmd(cmd)) + return -EINVAL; + + ndc->speed = speed; + ndc->duplex = cmd->duplex; + + return 0; +} + static int netvsc_change_mtu(struct net_device *ndev, int mtu) { struct net_device_context *ndevctx = netdev_priv(ndev); @@ -923,6 +975,8 @@ static const struct ethtool_ops ethtool_ops = { .get_channels = netvsc_get_channels, .set_channels = netvsc_set_channels, .get_ts_info= ethtool_op_get_ts_info, + .get_settings = netvsc_get_settings, + .set_settings = netvsc_set_settings, }; static const struct net_device_ops device_ops = { @@ -1112,6 +1166,8 @@ static int netvsc_probe(struct hv_device *dev, netif_set_real_num_tx_queues(net, nvdev->num_chn); netif_set_real_num_rx_queues(net, nvdev->num_chn); + netvsc_init_settings(net); + ret = register_netdev(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); -- 2.5.0
[PATCH net-next] hv_netvsc: add software transmit timestamp support
Enable skb_tx_timestamp in hyperv netvsc. Signed-off-by: Simon Xiao <six...@microsoft.com> Reviewed-by: K. Y. Srinivasan <k...@microsoft.com> Reviewed-by: Haiyang Zhang <haiya...@microsoft.com> --- drivers/net/hyperv/netvsc_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c72e5b8..202e2b1 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -550,6 +550,8 @@ do_send: packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, skb, packet, ); + /* timestamp packet in software */ + skb_tx_timestamp(skb); ret = netvsc_send(net_device_ctx->device_ctx, packet, rndis_msg, , skb); @@ -920,6 +922,7 @@ static const struct ethtool_ops ethtool_ops = { .get_link = ethtool_op_get_link, .get_channels = netvsc_get_channels, .set_channels = netvsc_set_channels, + .get_ts_info= ethtool_op_get_ts_info, }; static const struct net_device_ops device_ops = { -- 2.5.0
[PATCH V2 net-next] hv_netvsc: cleanup netdev feature flags for netvsc
1. Adding NETIF_F_TSO6 feature flag; 2. Adding NETIF_F_HW_CSUM. NETIF_F_IPV6_CSUM and NETIF_F_IP_CSUM are being deprecated; 3. Cleanup the coding style of flag assignment by using macro. Signed-off-by: Simon Xiao <six...@microsoft.com> Reviewed-by: K. Y. Srinivasan <k...@microsoft.com> Reviewed-by: Haiyang Zhang <haiya...@microsoft.com> --- drivers/net/hyperv/netvsc_drv.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 1d3a665..c72e5b8 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -43,6 +43,11 @@ #define RING_SIZE_MIN 64 #define LINKCHANGE_INT (2 * HZ) +#define NETVSC_HW_FEATURES (NETIF_F_RXCSUM | \ +NETIF_F_SG | \ +NETIF_F_TSO | \ +NETIF_F_TSO6 | \ +NETIF_F_HW_CSUM) static int ring_size = 128; module_param(ring_size, int, S_IRUGO); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); @@ -1081,10 +1086,8 @@ static int netvsc_probe(struct hv_device *dev, net->netdev_ops = _ops; - net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_TSO; - net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM | - NETIF_F_IP_CSUM | NETIF_F_TSO; + net->hw_features = NETVSC_HW_FEATURES; + net->features = NETVSC_HW_FEATURES | NETIF_F_HW_VLAN_CTAG_TX; net->ethtool_ops = _ops; SET_NETDEV_DEV(net, >device); -- 2.5.0
[PATCH net-next] hv_netvsc: Add feature flags NETIF_F_IPV6_CSUM and NETIF_F_TSO6 for netvsc
1. Adding NETIF_F_IPV6_CSUM and NETIF_F_TSO6 feature flags which are supported by Hyper-V platform. 2. Cleanup the coding style of flag assignment by using macro. Signed-off-by: Simon Xiao <six...@microsoft.com> Reviewed-by: K. Y. Srinivasan <k...@microsoft.com> Reviewed-by: Haiyang Zhang <haiya...@microsoft.com> --- drivers/net/hyperv/netvsc_drv.c | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 1d3a665..0cde741 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -43,6 +43,12 @@ #define RING_SIZE_MIN 64 #define LINKCHANGE_INT (2 * HZ) +#define NETVSC_HW_FEATURES (NETIF_F_RXCSUM | \ +NETIF_F_SG | \ +NETIF_F_TSO | \ +NETIF_F_TSO6 | \ +NETIF_F_IP_CSUM | \ +NETIF_F_IPV6_CSUM) static int ring_size = 128; module_param(ring_size, int, S_IRUGO); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); @@ -1081,10 +1087,8 @@ static int netvsc_probe(struct hv_device *dev, net->netdev_ops = _ops; - net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_TSO; - net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM | - NETIF_F_IP_CSUM | NETIF_F_TSO; + net->hw_features = NETVSC_HW_FEATURES; + net->features = NETVSC_HW_FEATURES | NETIF_F_HW_VLAN_CTAG_TX; net->ethtool_ops = _ops; SET_NETDEV_DEV(net, >device); -- 2.5.0
RE: linux-next network throughput performance regression
Thanks Eric to provide the data. I am looping Tom (as I am looking into his recent patches) and Olaf (from Suse). So, if I understand it correctly, you are running netperf with single TCP connection, and you got ~26Gbps initially and got ~30Gbps after turning the tx-usecs and tx-frames. Do you have a baseline on your environment for the best/max/or peak throughput? Again, in my environment (SLES bare metal), if use SLES12 default kernel as a baseline, we can see significant performance drop (10% ~ 50%) on latest linux-next kernel. Absolutely I will try the same test on net-next soon and update the results to here later. Thanks, Simon > -Original Message- > From: Eric Dumazet [mailto:eric.duma...@gmail.com] > Sent: Saturday, November 7, 2015 11:50 AM > To: David Ahern <d...@cumulusnetworks.com> > Cc: Simon Xiao <six...@microsoft.com>; de...@linuxdriverproject.org; > netdev@vger.kernel.org; linux-ker...@vger.kernel.org; David Miller > <da...@davemloft.net>; KY Srinivasan <k...@microsoft.com>; Haiyang > Zhang <haiya...@microsoft.com> > Subject: Re: linux-next network throughput performance regression > > On Sat, 2015-11-07 at 11:35 -0800, Eric Dumazet wrote: > > On Fri, 2015-11-06 at 14:30 -0700, David Ahern wrote: > > > On 11/6/15 2:18 PM, Simon Xiao wrote: > > > > The .config file used to build linux-next kernel is attached to this > > > > mail. > > > > > > Thanks. > > > > > > Failed to notice this on the first response; my brain filled in. Why > > > linux-next tree? Can you try net-next which is more relevant for > > > this mailing list, post the top commit id and config file used? > > > > Throughput on a single TCP flow for a 40G NIC can be tricky to tune. > > > > Make sure IRQ are properly setup/balanced, as I know that IRQ names > > were changed recently and your scripts might have not noticed... > > > > Also "ethtool -c eth0" might show very different interrupt coalescing > > params ? > > > > I too have a Mellanox 40Gb in my lab and saw no difference in > > performance with recent kernels. > > > > Of course, a simple "perf record -a -g sleep 4 ; perf report" might > > point to some obvious issue. Like unexpected segmentation in case of > > forwarding... > > > > > > I did a test with current net tree on both sender and receiver > > lpaa23:~# ./netperf -H 10.246.7.152 > MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to > 10.246.7.152 () port 0 AF_INET > Recv SendSend > Socket Socket Message Elapsed > Size SizeSize Time Throughput > bytes bytes bytessecs.10^6bits/sec > > 87380 16384 1638410.0026864.98 > lpaa23:~# ethtool -c eth1 > Coalesce parameters for eth1: > Adaptive RX: on TX: off > stats-block-usecs: 0 > sample-interval: 0 > pkt-rate-low: 40 > pkt-rate-high: 45 > > rx-usecs: 16 > rx-frames: 44 > rx-usecs-irq: 0 > rx-frames-irq: 0 > > tx-usecs: 16 > tx-frames: 16 > tx-usecs-irq: 0 > tx-frames-irq: 256 > > rx-usecs-low: 0 > rx-frame-low: 0 > tx-usecs-low: 0 > tx-frame-low: 0 > > rx-usecs-high: 128 > rx-frame-high: 0 > tx-usecs-high: 0 > tx-frame-high: 0 > > lpaa23:~# ethtool -C eth1 tx-usecs 4 tx-frames 4 > lpaa23:~# ./netperf -H > 10.246.7.152 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 > AF_INET to > 10.246.7.152 () port 0 AF_INET > Recv SendSend > Socket Socket Message Elapsed > Size SizeSize Time Throughput > bytes bytes bytessecs.10^6bits/sec > > 87380 16384 1638410.0030206.27 >
linux-next network throughput performance regression
I compared the network throughput performance on SLES12 bare metal servers, between SLES12 default kernel and latest linux-next (2015-11-05) kernel, based on the test results, I suspect there is a network regression exists on Linux-Next over the 40G Ethernet network: a) iperf3 reports 50% performance drop with single TCP stream on latest linux-next; b) iperf3 reports 10% ~ 30% performance drop with 2 to 128 TCP streams on latest linux-next; Another throughput benchmarking tool (ntttcp-for-linux) test result is also listed at the end of the email for reference. Server configuration: -- Two servers (one client and one server, cross linked by 40G Ethernet), which have: a) CPU: Intel(R) Xeon(R) CPU E5-2667 v3 @ 3.20GHz, 2 sockets, 16 CPUs, cache size : 20480 KB b) Memory: 64 GB c) Ethernet controller: Mellanox Technologies MT27520 Family [ConnectX-3 Pro], 40G Ethernet, default driver Test with iperf3: -- iperf3: https://github.com/esnet/iperf a) SLES12 default kernel, network throughput tested by iperf3: Test Connections1 2 4 8 16 32 64 128 Throughput (G bps) 36.737.337.637.737.737.737.7 25.7 b) SLES12 + Linux-Next 20151105, network throughput tested by iperf3: Test Connections1 2 4 8 16 32 64 128 Throughput (G bps) 18.232.234.632.827.632.027.0 21.3 Percentage dropped -50%-14%-8% -13%-27%-15%-28% -17% Test with ntttcp-for-linux: -- ntttcp-for-linux: https://github.com/Microsoft/ntttcp-for-linux a) SLES12 default kernel, network throughput tested by ntttcp-for-linux: Test Connections1 2 4 8 16 32 64 128 256 512 Throughput (Gbps) 36.19 37.29 37.67 37.68 37.737.72 37.74 37.76 37.81 37.9 b) SLES12 + Linux-Next 20151105, network throughput tested by ntttcp-for-linux: Test Connections1 2 4 8 16 32 64 128 256 512 Throughput (Gbps) 28.12 34.01 37.636.53 32.94 33.07 33.63 33.44 33.83 34.42 Percentage dropped -22%-9% 0% -3% -13%-12%-11% -11%-11%-9% -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH net-next,v2,1/1] hv_netvsc: introduce netif-msg into netvsc module
-Original Message- From: Joe Perches [mailto:j...@perches.com] Sent: Friday, April 24, 2015 1:29 PM To: Simon Xiao Cc: KY Srinivasan; Haiyang Zhang; de...@linuxdriverproject.org; netdev@vger.kernel.org; linux-ker...@vger.kernel.org Subject: Re: [PATCH net-next,v2,1/1] hv_netvsc: introduce netif-msg into netvsc module On Fri, 2015-04-24 at 11:34 -0700, six...@microsoft.com wrote: From: Simon Xiao six...@microsoft.com 1. Introduce netif-msg to netvsc to control debug logging output and keep msg_enable in netvsc_device_context so that it is kept persistently. 2. Only call dump_rndis_message() when NETIF_MSG_RX_ERR or above is specified in netvsc module debug param. In non-debug mode, in current code, dump_rndis_message() will not dump anything but it still initialize some local variables and process the switch logic which is unnecessary, especially in high network throughput situation. [] diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c [] @@ -888,6 +891,11 @@ static int netvsc_probe(struct hv_device *dev, net_device_ctx = netdev_priv(net); net_device_ctx-device_ctx = dev; + net_device_ctx-msg_enable = netif_msg_init(debug, default_msg); + if (netif_msg_probe(net_device_ctx)) + netdev_dbg(net, netvsc msg_enable: %d, + net_device_ctx-msg_enable); Please use newlines to terminate formats. It helps prevent log content interleaving when multiple processes are emitting output at the same time. This could be shortened to use netif_level like: netif_dbg(net_device_ctx, probe, net, netvsc_msg_enable: %d\n, net_device_ctx-msg_enable); Thanks Joe. I would like to leave this to my next patch as there are some places else in netvsc (rndis_filter.c) have the same usage. I would like to fix them in one patch to make them consistent. Thanks, Simon -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH 1/1] v2: Driver: hv: netvsc: call dump_rndis_message() only in netvsc debug mode
Sorry - this patch should be sent to net-next so please ignore it. Thanks, Simon -Original Message- From: six...@microsoft.com [mailto:six...@microsoft.com] Sent: Tuesday, April 21, 2015 2:44 PM To: KY Srinivasan; Haiyang Zhang; netdev@vger.kernel.org; linux-ker...@vger.kernel.org Cc: Simon Xiao Subject: [PATCH 1/1] v2: Driver: hv: netvsc: call dump_rndis_message() only in netvsc debug mode From: Simon Xiao six...@microsoft.com Signed-off-by: Simon Xiao six...@microsoft.com --- drivers/net/hyperv/hyperv_net.h | 3 +++ drivers/net/hyperv/netvsc_drv.c | 8 drivers/net/hyperv/rndis_filter.c | 3 ++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index a10b316..c9be35e 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -28,6 +28,9 @@ #include linux/hyperv.h #include linux/rndis.h +/* flag for netvsc debug mode */ +extern int debug_mode; + /* RSS related */ #define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */ #define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204 /* query and set */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index a3a9d38..7c41864 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -52,6 +52,10 @@ static int ring_size = 128; module_param(ring_size, int, S_IRUGO); MODULE_PARM_DESC(ring_size, Ring buffer size (# of pages)); +int debug_mode = 0; +module_param(debug_mode, int, S_IRUGO); MODULE_PARM_DESC(debug_mode, +debug mode: zero(0) for non-debug mode; non-zero for debug mode); + static void do_set_multicast(struct work_struct *w) { struct net_device_context *ndevctx = @@ -999,6 +1003,10 @@ static int __init netvsc_drv_init(void) pr_info(Increased ring_size to %d (min allowed)\n, ring_size); } + + if (debug_mode != 0) + pr_info(Run netvsc in debug mode); + return vmbus_driver_register(netvsc_drv); } diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 0d92efe..a3f43f6 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -429,7 +429,8 @@ int rndis_filter_receive(struct hv_device *dev, rndis_msg = pkt-data; - dump_rndis_message(dev, rndis_msg); + if (debug_mode != 0) + dump_rndis_message(dev, rndis_msg); switch (rndis_msg-ndis_msg_type) { case RNDIS_MSG_PACKET: -- 1.8.5.2 -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH net-next,1/1] hv_netvsc: call dump_rndis_message() only in netvsc debug mode
-Original Message- From: David Miller [mailto:da...@davemloft.net] Sent: Tuesday, April 21, 2015 2:49 PM To: Simon Xiao Cc: KY Srinivasan; Haiyang Zhang; de...@linuxdriverproject.org; netdev@vger.kernel.org; linux-ker...@vger.kernel.org Subject: Re: [PATCH net-next,1/1] hv_netvsc: call dump_rndis_message() only in netvsc debug mode From: six...@microsoft.com Date: Tue, 21 Apr 2015 15:58:05 -0700 From: Simon Xiao six...@microsoft.com Signed-off-by: Simon Xiao six...@microsoft.com Reviewed-by: K. Y. Srinivasan k...@microsoft.com Reviewed-by: Haiyang Zhang haiya...@microsoft.com I just gave you feedback on this patch in response to your original submission, do not ignore it. Thanks for your feedback, David. In current netvsc driver, for each packet received, it will call dump_rndis_message() to try to dump the rndis packet information by netdev_dbg(). In non-debug mode, dump_rndis_message() will not dump anything but it still initialize some local variables and process the switch logic in the function of dump_rndis_message(), which is unnecessary, especially in high network throughput situation. My change is to have a run-time config flag to control the execution of dump_rndis_message() and avoid above unnecessary cost in non-debug mode. In the default case, it will be non-debug mode, and rndis_filter_receive() will not call dump_rndis_message() which saves the above extra cost for each packet received. -- To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html